1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v4i64_v3i64__u_u_u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v4i64_v3i64__u_u_u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <3 x i64> asm "; def $0", "=v"()
13 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> poison
14 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
18 define void @v_shuffle_v4i64_v3i64__0_u_u_u(ptr addrspace(1) inreg %ptr) {
19 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
21 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
23 ; GFX900-NEXT: ;;#ASMSTART
24 ; GFX900-NEXT: ; def v[0:5]
25 ; GFX900-NEXT: ;;#ASMEND
26 ; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
27 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
28 ; GFX900-NEXT: s_waitcnt vmcnt(0)
29 ; GFX900-NEXT: s_setpc_b64 s[30:31]
31 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
33 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
35 ; GFX90A-NEXT: ;;#ASMSTART
36 ; GFX90A-NEXT: ; def v[0:5]
37 ; GFX90A-NEXT: ;;#ASMEND
38 ; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
39 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
40 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
41 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
43 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
45 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
47 ; GFX940-NEXT: ;;#ASMSTART
48 ; GFX940-NEXT: ; def v[0:5]
49 ; GFX940-NEXT: ;;#ASMEND
50 ; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1
51 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
52 ; GFX940-NEXT: s_waitcnt vmcnt(0)
53 ; GFX940-NEXT: s_setpc_b64 s[30:31]
54 %vec0 = call <3 x i64> asm "; def $0", "=v"()
55 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
56 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
60 define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
61 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
63 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
65 ; GFX900-NEXT: ;;#ASMSTART
66 ; GFX900-NEXT: ; def v[0:5]
67 ; GFX900-NEXT: ;;#ASMEND
68 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
69 ; GFX900-NEXT: s_waitcnt vmcnt(0)
70 ; GFX900-NEXT: s_setpc_b64 s[30:31]
72 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
74 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
76 ; GFX90A-NEXT: ;;#ASMSTART
77 ; GFX90A-NEXT: ; def v[0:5]
78 ; GFX90A-NEXT: ;;#ASMEND
79 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
80 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
81 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
83 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
85 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
87 ; GFX940-NEXT: ;;#ASMSTART
88 ; GFX940-NEXT: ; def v[0:5]
89 ; GFX940-NEXT: ;;#ASMEND
90 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
91 ; GFX940-NEXT: s_waitcnt vmcnt(0)
92 ; GFX940-NEXT: s_setpc_b64 s[30:31]
93 %vec0 = call <3 x i64> asm "; def $0", "=v"()
94 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
95 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
99 define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
100 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
102 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX900-NEXT: ;;#ASMSTART
104 ; GFX900-NEXT: ; def v[0:5]
105 ; GFX900-NEXT: ;;#ASMEND
106 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
107 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
108 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
109 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
110 ; GFX900-NEXT: s_waitcnt vmcnt(0)
111 ; GFX900-NEXT: s_setpc_b64 s[30:31]
113 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
115 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GFX90A-NEXT: ;;#ASMSTART
117 ; GFX90A-NEXT: ; def v[0:5]
118 ; GFX90A-NEXT: ;;#ASMEND
119 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
120 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
121 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
122 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
123 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
124 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
126 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
128 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129 ; GFX940-NEXT: ;;#ASMSTART
130 ; GFX940-NEXT: ; def v[0:5]
131 ; GFX940-NEXT: ;;#ASMEND
132 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
133 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
134 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
135 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
136 ; GFX940-NEXT: s_waitcnt vmcnt(0)
137 ; GFX940-NEXT: s_setpc_b64 s[30:31]
138 %vec0 = call <3 x i64> asm "; def $0", "=v"()
139 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
140 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
144 define void @v_shuffle_v4i64_v3i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
145 ; GFX9-LABEL: v_shuffle_v4i64_v3i64__3_u_u_u:
147 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX9-NEXT: s_setpc_b64 s[30:31]
149 %vec0 = call <3 x i64> asm "; def $0", "=v"()
150 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
151 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
155 define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) {
156 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
158 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
160 ; GFX900-NEXT: ;;#ASMSTART
161 ; GFX900-NEXT: ; def v[0:5]
162 ; GFX900-NEXT: ;;#ASMEND
163 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
164 ; GFX900-NEXT: s_waitcnt vmcnt(0)
165 ; GFX900-NEXT: s_setpc_b64 s[30:31]
167 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
169 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
171 ; GFX90A-NEXT: ;;#ASMSTART
172 ; GFX90A-NEXT: ; def v[0:5]
173 ; GFX90A-NEXT: ;;#ASMEND
174 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
175 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
176 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
178 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
180 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
182 ; GFX940-NEXT: ;;#ASMSTART
183 ; GFX940-NEXT: ; def v[0:5]
184 ; GFX940-NEXT: ;;#ASMEND
185 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
186 ; GFX940-NEXT: s_waitcnt vmcnt(0)
187 ; GFX940-NEXT: s_setpc_b64 s[30:31]
188 %vec0 = call <3 x i64> asm "; def $0", "=v"()
189 %vec1 = call <3 x i64> asm "; def $0", "=v"()
190 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
191 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
195 define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
196 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
198 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199 ; GFX900-NEXT: ;;#ASMSTART
200 ; GFX900-NEXT: ; def v[0:5]
201 ; GFX900-NEXT: ;;#ASMEND
202 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
203 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
204 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
205 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
206 ; GFX900-NEXT: s_waitcnt vmcnt(0)
207 ; GFX900-NEXT: s_setpc_b64 s[30:31]
209 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
211 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX90A-NEXT: ;;#ASMSTART
213 ; GFX90A-NEXT: ; def v[0:5]
214 ; GFX90A-NEXT: ;;#ASMEND
215 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
216 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
217 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
218 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
219 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
220 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
222 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
224 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225 ; GFX940-NEXT: ;;#ASMSTART
226 ; GFX940-NEXT: ; def v[0:5]
227 ; GFX940-NEXT: ;;#ASMEND
228 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
229 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
230 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
231 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
232 ; GFX940-NEXT: s_waitcnt vmcnt(0)
233 ; GFX940-NEXT: s_setpc_b64 s[30:31]
234 %vec0 = call <3 x i64> asm "; def $0", "=v"()
235 %vec1 = call <3 x i64> asm "; def $0", "=v"()
236 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
237 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
241 define void @v_shuffle_v4i64_v3i64__5_0_u_u(ptr addrspace(1) inreg %ptr) {
242 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
244 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX900-NEXT: ;;#ASMSTART
246 ; GFX900-NEXT: ; def v[0:5]
247 ; GFX900-NEXT: ;;#ASMEND
248 ; GFX900-NEXT: ;;#ASMSTART
249 ; GFX900-NEXT: ; def v[2:7]
250 ; GFX900-NEXT: ;;#ASMEND
251 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
252 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
253 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
254 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
255 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
256 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
257 ; GFX900-NEXT: s_waitcnt vmcnt(0)
258 ; GFX900-NEXT: s_setpc_b64 s[30:31]
260 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
262 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX90A-NEXT: ;;#ASMSTART
264 ; GFX90A-NEXT: ; def v[0:5]
265 ; GFX90A-NEXT: ;;#ASMEND
266 ; GFX90A-NEXT: ;;#ASMSTART
267 ; GFX90A-NEXT: ; def v[2:7]
268 ; GFX90A-NEXT: ;;#ASMEND
269 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
270 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
271 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
272 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
273 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
274 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
275 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
276 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
278 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
280 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; GFX940-NEXT: ;;#ASMSTART
282 ; GFX940-NEXT: ; def v[0:5]
283 ; GFX940-NEXT: ;;#ASMEND
284 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
285 ; GFX940-NEXT: ;;#ASMSTART
286 ; GFX940-NEXT: ; def v[2:7]
287 ; GFX940-NEXT: ;;#ASMEND
288 ; GFX940-NEXT: s_nop 0
289 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
290 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
291 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
292 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
293 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
294 ; GFX940-NEXT: s_waitcnt vmcnt(0)
295 ; GFX940-NEXT: s_setpc_b64 s[30:31]
296 %vec0 = call <3 x i64> asm "; def $0", "=v"()
297 %vec1 = call <3 x i64> asm "; def $0", "=v"()
298 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
299 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
303 define void @v_shuffle_v4i64_v3i64__5_1_u_u(ptr addrspace(1) inreg %ptr) {
304 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
306 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; GFX900-NEXT: ;;#ASMSTART
308 ; GFX900-NEXT: ; def v[0:5]
309 ; GFX900-NEXT: ;;#ASMEND
310 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
311 ; GFX900-NEXT: ;;#ASMSTART
312 ; GFX900-NEXT: ; def v[4:9]
313 ; GFX900-NEXT: ;;#ASMEND
314 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
315 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
316 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
317 ; GFX900-NEXT: s_waitcnt vmcnt(0)
318 ; GFX900-NEXT: s_setpc_b64 s[30:31]
320 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
322 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; GFX90A-NEXT: ;;#ASMSTART
324 ; GFX90A-NEXT: ; def v[0:5]
325 ; GFX90A-NEXT: ;;#ASMEND
326 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
327 ; GFX90A-NEXT: ;;#ASMSTART
328 ; GFX90A-NEXT: ; def v[4:9]
329 ; GFX90A-NEXT: ;;#ASMEND
330 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
331 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
332 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
333 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
334 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
336 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
338 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX940-NEXT: ;;#ASMSTART
340 ; GFX940-NEXT: ; def v[0:5]
341 ; GFX940-NEXT: ;;#ASMEND
342 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
343 ; GFX940-NEXT: ;;#ASMSTART
344 ; GFX940-NEXT: ; def v[4:9]
345 ; GFX940-NEXT: ;;#ASMEND
346 ; GFX940-NEXT: s_nop 0
347 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
348 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
349 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
350 ; GFX940-NEXT: s_waitcnt vmcnt(0)
351 ; GFX940-NEXT: s_setpc_b64 s[30:31]
352 %vec0 = call <3 x i64> asm "; def $0", "=v"()
353 %vec1 = call <3 x i64> asm "; def $0", "=v"()
354 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
355 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
359 define void @v_shuffle_v4i64_v3i64__5_2_u_u(ptr addrspace(1) inreg %ptr) {
360 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
362 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; GFX900-NEXT: ;;#ASMSTART
364 ; GFX900-NEXT: ; def v[0:5]
365 ; GFX900-NEXT: ;;#ASMEND
366 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
367 ; GFX900-NEXT: ;;#ASMSTART
368 ; GFX900-NEXT: ; def v[6:11]
369 ; GFX900-NEXT: ;;#ASMEND
370 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
371 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
372 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
373 ; GFX900-NEXT: s_waitcnt vmcnt(0)
374 ; GFX900-NEXT: s_setpc_b64 s[30:31]
376 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
378 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX90A-NEXT: ;;#ASMSTART
380 ; GFX90A-NEXT: ; def v[0:5]
381 ; GFX90A-NEXT: ;;#ASMEND
382 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
383 ; GFX90A-NEXT: ;;#ASMSTART
384 ; GFX90A-NEXT: ; def v[6:11]
385 ; GFX90A-NEXT: ;;#ASMEND
386 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
387 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
388 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
389 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
390 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
392 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
394 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GFX940-NEXT: ;;#ASMSTART
396 ; GFX940-NEXT: ; def v[0:5]
397 ; GFX940-NEXT: ;;#ASMEND
398 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
399 ; GFX940-NEXT: ;;#ASMSTART
400 ; GFX940-NEXT: ; def v[6:11]
401 ; GFX940-NEXT: ;;#ASMEND
402 ; GFX940-NEXT: s_nop 0
403 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
404 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
405 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
406 ; GFX940-NEXT: s_waitcnt vmcnt(0)
407 ; GFX940-NEXT: s_setpc_b64 s[30:31]
408 %vec0 = call <3 x i64> asm "; def $0", "=v"()
409 %vec1 = call <3 x i64> asm "; def $0", "=v"()
410 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
411 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
415 define void @v_shuffle_v4i64_v3i64__5_3_u_u(ptr addrspace(1) inreg %ptr) {
416 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
418 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419 ; GFX900-NEXT: ;;#ASMSTART
420 ; GFX900-NEXT: ; def v[0:5]
421 ; GFX900-NEXT: ;;#ASMEND
422 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
423 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
424 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
425 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
426 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
427 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
428 ; GFX900-NEXT: s_waitcnt vmcnt(0)
429 ; GFX900-NEXT: s_setpc_b64 s[30:31]
431 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
433 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434 ; GFX90A-NEXT: ;;#ASMSTART
435 ; GFX90A-NEXT: ; def v[0:5]
436 ; GFX90A-NEXT: ;;#ASMEND
437 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
438 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
439 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
440 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
441 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
442 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
443 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
444 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
446 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
448 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449 ; GFX940-NEXT: ;;#ASMSTART
450 ; GFX940-NEXT: ; def v[0:5]
451 ; GFX940-NEXT: ;;#ASMEND
452 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
453 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
454 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
455 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
456 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
457 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
458 ; GFX940-NEXT: s_waitcnt vmcnt(0)
459 ; GFX940-NEXT: s_setpc_b64 s[30:31]
460 %vec0 = call <3 x i64> asm "; def $0", "=v"()
461 %vec1 = call <3 x i64> asm "; def $0", "=v"()
462 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
463 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
467 define void @v_shuffle_v4i64_v3i64__5_4_u_u(ptr addrspace(1) inreg %ptr) {
468 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
470 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX900-NEXT: ;;#ASMSTART
472 ; GFX900-NEXT: ; def v[0:5]
473 ; GFX900-NEXT: ;;#ASMEND
474 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
475 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
476 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
477 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
478 ; GFX900-NEXT: s_waitcnt vmcnt(0)
479 ; GFX900-NEXT: s_setpc_b64 s[30:31]
481 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
483 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484 ; GFX90A-NEXT: ;;#ASMSTART
485 ; GFX90A-NEXT: ; def v[0:5]
486 ; GFX90A-NEXT: ;;#ASMEND
487 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
488 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
489 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
490 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
491 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
492 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
494 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
496 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497 ; GFX940-NEXT: ;;#ASMSTART
498 ; GFX940-NEXT: ; def v[0:5]
499 ; GFX940-NEXT: ;;#ASMEND
500 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
501 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
502 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
503 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
504 ; GFX940-NEXT: s_waitcnt vmcnt(0)
505 ; GFX940-NEXT: s_setpc_b64 s[30:31]
506 %vec0 = call <3 x i64> asm "; def $0", "=v"()
507 %vec1 = call <3 x i64> asm "; def $0", "=v"()
508 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
509 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
513 define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) {
514 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
516 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517 ; GFX900-NEXT: ;;#ASMSTART
518 ; GFX900-NEXT: ; def v[0:5]
519 ; GFX900-NEXT: ;;#ASMEND
520 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
521 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
522 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
523 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
524 ; GFX900-NEXT: s_waitcnt vmcnt(0)
525 ; GFX900-NEXT: s_setpc_b64 s[30:31]
527 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
529 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530 ; GFX90A-NEXT: ;;#ASMSTART
531 ; GFX90A-NEXT: ; def v[0:5]
532 ; GFX90A-NEXT: ;;#ASMEND
533 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
534 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
535 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
536 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
537 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
538 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
540 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
542 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543 ; GFX940-NEXT: ;;#ASMSTART
544 ; GFX940-NEXT: ; def v[0:5]
545 ; GFX940-NEXT: ;;#ASMEND
546 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
547 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
548 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
549 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
550 ; GFX940-NEXT: s_waitcnt vmcnt(0)
551 ; GFX940-NEXT: s_setpc_b64 s[30:31]
552 %vec0 = call <3 x i64> asm "; def $0", "=v"()
553 %vec1 = call <3 x i64> asm "; def $0", "=v"()
554 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
555 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
559 define void @v_shuffle_v4i64_v3i64__5_5_0_u(ptr addrspace(1) inreg %ptr) {
560 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
562 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563 ; GFX900-NEXT: ;;#ASMSTART
564 ; GFX900-NEXT: ; def v[0:5]
565 ; GFX900-NEXT: ;;#ASMEND
566 ; GFX900-NEXT: ;;#ASMSTART
567 ; GFX900-NEXT: ; def v[2:7]
568 ; GFX900-NEXT: ;;#ASMEND
569 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
570 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
571 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
572 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
573 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
574 ; GFX900-NEXT: s_waitcnt vmcnt(0)
575 ; GFX900-NEXT: s_setpc_b64 s[30:31]
577 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
579 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580 ; GFX90A-NEXT: ;;#ASMSTART
581 ; GFX90A-NEXT: ; def v[0:5]
582 ; GFX90A-NEXT: ;;#ASMEND
583 ; GFX90A-NEXT: ;;#ASMSTART
584 ; GFX90A-NEXT: ; def v[2:7]
585 ; GFX90A-NEXT: ;;#ASMEND
586 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
587 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
588 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
589 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
590 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
591 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
592 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
594 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
596 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GFX940-NEXT: ;;#ASMSTART
598 ; GFX940-NEXT: ; def v[0:5]
599 ; GFX940-NEXT: ;;#ASMEND
600 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
601 ; GFX940-NEXT: ;;#ASMSTART
602 ; GFX940-NEXT: ; def v[2:7]
603 ; GFX940-NEXT: ;;#ASMEND
604 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
605 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
606 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
607 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
608 ; GFX940-NEXT: s_waitcnt vmcnt(0)
609 ; GFX940-NEXT: s_setpc_b64 s[30:31]
610 %vec0 = call <3 x i64> asm "; def $0", "=v"()
611 %vec1 = call <3 x i64> asm "; def $0", "=v"()
612 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
613 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
617 define void @v_shuffle_v4i64_v3i64__5_5_1_u(ptr addrspace(1) inreg %ptr) {
618 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
620 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GFX900-NEXT: ;;#ASMSTART
622 ; GFX900-NEXT: ; def v[0:5]
623 ; GFX900-NEXT: ;;#ASMEND
624 ; GFX900-NEXT: ;;#ASMSTART
625 ; GFX900-NEXT: ; def v[4:9]
626 ; GFX900-NEXT: ;;#ASMEND
627 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
628 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
629 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
630 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
631 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
632 ; GFX900-NEXT: s_waitcnt vmcnt(0)
633 ; GFX900-NEXT: s_setpc_b64 s[30:31]
635 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
637 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638 ; GFX90A-NEXT: ;;#ASMSTART
639 ; GFX90A-NEXT: ; def v[0:5]
640 ; GFX90A-NEXT: ;;#ASMEND
641 ; GFX90A-NEXT: ;;#ASMSTART
642 ; GFX90A-NEXT: ; def v[4:9]
643 ; GFX90A-NEXT: ;;#ASMEND
644 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
645 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
646 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
647 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
648 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
649 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
650 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
652 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
654 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655 ; GFX940-NEXT: ;;#ASMSTART
656 ; GFX940-NEXT: ; def v[0:5]
657 ; GFX940-NEXT: ;;#ASMEND
658 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
659 ; GFX940-NEXT: ;;#ASMSTART
660 ; GFX940-NEXT: ; def v[4:9]
661 ; GFX940-NEXT: ;;#ASMEND
662 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
663 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
664 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
665 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
666 ; GFX940-NEXT: s_waitcnt vmcnt(0)
667 ; GFX940-NEXT: s_setpc_b64 s[30:31]
668 %vec0 = call <3 x i64> asm "; def $0", "=v"()
669 %vec1 = call <3 x i64> asm "; def $0", "=v"()
670 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
671 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
675 define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) {
676 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
678 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679 ; GFX900-NEXT: ;;#ASMSTART
680 ; GFX900-NEXT: ; def v[0:5]
681 ; GFX900-NEXT: ;;#ASMEND
682 ; GFX900-NEXT: ;;#ASMSTART
683 ; GFX900-NEXT: ; def v[6:11]
684 ; GFX900-NEXT: ;;#ASMEND
685 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
686 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
687 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
688 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
689 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
690 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
691 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
692 ; GFX900-NEXT: s_waitcnt vmcnt(0)
693 ; GFX900-NEXT: s_setpc_b64 s[30:31]
695 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
697 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698 ; GFX90A-NEXT: ;;#ASMSTART
699 ; GFX90A-NEXT: ; def v[0:5]
700 ; GFX90A-NEXT: ;;#ASMEND
701 ; GFX90A-NEXT: ;;#ASMSTART
702 ; GFX90A-NEXT: ; def v[6:11]
703 ; GFX90A-NEXT: ;;#ASMEND
704 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
705 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
706 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
707 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
708 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
709 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
710 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
711 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
712 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
714 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
716 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717 ; GFX940-NEXT: ;;#ASMSTART
718 ; GFX940-NEXT: ; def v[0:5]
719 ; GFX940-NEXT: ;;#ASMEND
720 ; GFX940-NEXT: ;;#ASMSTART
721 ; GFX940-NEXT: ; def v[6:11]
722 ; GFX940-NEXT: ;;#ASMEND
723 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
724 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
725 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
726 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
727 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
728 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
729 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
730 ; GFX940-NEXT: s_waitcnt vmcnt(0)
731 ; GFX940-NEXT: s_setpc_b64 s[30:31]
732 %vec0 = call <3 x i64> asm "; def $0", "=v"()
733 %vec1 = call <3 x i64> asm "; def $0", "=v"()
734 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
735 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
739 define void @v_shuffle_v4i64_v3i64__5_5_3_u(ptr addrspace(1) inreg %ptr) {
740 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
742 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
744 ; GFX900-NEXT: ;;#ASMSTART
745 ; GFX900-NEXT: ; def v[0:5]
746 ; GFX900-NEXT: ;;#ASMEND
747 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
748 ; GFX900-NEXT: s_nop 0
749 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
750 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
751 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
752 ; GFX900-NEXT: s_waitcnt vmcnt(0)
753 ; GFX900-NEXT: s_setpc_b64 s[30:31]
755 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
757 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
759 ; GFX90A-NEXT: ;;#ASMSTART
760 ; GFX90A-NEXT: ; def v[0:5]
761 ; GFX90A-NEXT: ;;#ASMEND
762 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
763 ; GFX90A-NEXT: s_nop 0
764 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
765 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
766 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
767 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
768 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
770 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
772 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
774 ; GFX940-NEXT: ;;#ASMSTART
775 ; GFX940-NEXT: ; def v[0:5]
776 ; GFX940-NEXT: ;;#ASMEND
777 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
778 ; GFX940-NEXT: s_nop 1
779 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
780 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
781 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
782 ; GFX940-NEXT: s_waitcnt vmcnt(0)
783 ; GFX940-NEXT: s_setpc_b64 s[30:31]
784 %vec0 = call <3 x i64> asm "; def $0", "=v"()
785 %vec1 = call <3 x i64> asm "; def $0", "=v"()
786 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
787 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
791 define void @v_shuffle_v4i64_v3i64__5_5_4_u(ptr addrspace(1) inreg %ptr) {
792 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
794 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
795 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
796 ; GFX900-NEXT: ;;#ASMSTART
797 ; GFX900-NEXT: ; def v[0:5]
798 ; GFX900-NEXT: ;;#ASMEND
799 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
800 ; GFX900-NEXT: s_nop 0
801 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
802 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
803 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
804 ; GFX900-NEXT: s_waitcnt vmcnt(0)
805 ; GFX900-NEXT: s_setpc_b64 s[30:31]
807 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
809 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
810 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
811 ; GFX90A-NEXT: ;;#ASMSTART
812 ; GFX90A-NEXT: ; def v[0:5]
813 ; GFX90A-NEXT: ;;#ASMEND
814 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
815 ; GFX90A-NEXT: s_nop 0
816 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
817 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
818 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
819 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
820 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
822 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
824 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
826 ; GFX940-NEXT: ;;#ASMSTART
827 ; GFX940-NEXT: ; def v[0:5]
828 ; GFX940-NEXT: ;;#ASMEND
829 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
830 ; GFX940-NEXT: s_nop 1
831 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
832 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
833 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
834 ; GFX940-NEXT: s_waitcnt vmcnt(0)
835 ; GFX940-NEXT: s_setpc_b64 s[30:31]
836 %vec0 = call <3 x i64> asm "; def $0", "=v"()
837 %vec1 = call <3 x i64> asm "; def $0", "=v"()
838 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
839 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
843 define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
844 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
846 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847 ; GFX900-NEXT: ;;#ASMSTART
848 ; GFX900-NEXT: ; def v[0:5]
849 ; GFX900-NEXT: ;;#ASMEND
850 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
851 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
852 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
853 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
854 ; GFX900-NEXT: s_nop 0
855 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
856 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
857 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
858 ; GFX900-NEXT: s_waitcnt vmcnt(0)
859 ; GFX900-NEXT: s_setpc_b64 s[30:31]
861 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
863 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864 ; GFX90A-NEXT: ;;#ASMSTART
865 ; GFX90A-NEXT: ; def v[0:5]
866 ; GFX90A-NEXT: ;;#ASMEND
867 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
868 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
869 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
870 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
871 ; GFX90A-NEXT: s_nop 0
872 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
873 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
874 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
875 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
876 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
878 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
880 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881 ; GFX940-NEXT: ;;#ASMSTART
882 ; GFX940-NEXT: ; def v[0:5]
883 ; GFX940-NEXT: ;;#ASMEND
884 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
885 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
886 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
887 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
888 ; GFX940-NEXT: s_nop 1
889 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
890 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
891 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
892 ; GFX940-NEXT: s_waitcnt vmcnt(0)
893 ; GFX940-NEXT: s_setpc_b64 s[30:31]
894 %vec0 = call <3 x i64> asm "; def $0", "=v"()
895 %vec1 = call <3 x i64> asm "; def $0", "=v"()
896 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
897 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
901 define void @v_shuffle_v4i64_v3i64__5_5_5_0(ptr addrspace(1) inreg %ptr) {
902 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
904 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905 ; GFX900-NEXT: ;;#ASMSTART
906 ; GFX900-NEXT: ; def v[0:5]
907 ; GFX900-NEXT: ;;#ASMEND
908 ; GFX900-NEXT: ;;#ASMSTART
909 ; GFX900-NEXT: ; def v[2:7]
910 ; GFX900-NEXT: ;;#ASMEND
911 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
912 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
913 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
914 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
915 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
916 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
917 ; GFX900-NEXT: s_nop 0
918 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
919 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
920 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
921 ; GFX900-NEXT: s_waitcnt vmcnt(0)
922 ; GFX900-NEXT: s_setpc_b64 s[30:31]
924 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
926 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
927 ; GFX90A-NEXT: ;;#ASMSTART
928 ; GFX90A-NEXT: ; def v[0:5]
929 ; GFX90A-NEXT: ;;#ASMEND
930 ; GFX90A-NEXT: ;;#ASMSTART
931 ; GFX90A-NEXT: ; def v[2:7]
932 ; GFX90A-NEXT: ;;#ASMEND
933 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
934 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
935 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
936 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
937 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
938 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
939 ; GFX90A-NEXT: s_nop 0
940 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
941 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
942 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
943 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
944 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
946 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
948 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949 ; GFX940-NEXT: ;;#ASMSTART
950 ; GFX940-NEXT: ; def v[0:5]
951 ; GFX940-NEXT: ;;#ASMEND
952 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
953 ; GFX940-NEXT: ;;#ASMSTART
954 ; GFX940-NEXT: ; def v[2:7]
955 ; GFX940-NEXT: ;;#ASMEND
956 ; GFX940-NEXT: s_nop 0
957 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
958 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
959 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
960 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
961 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
962 ; GFX940-NEXT: s_nop 1
963 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
964 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
965 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
966 ; GFX940-NEXT: s_waitcnt vmcnt(0)
967 ; GFX940-NEXT: s_setpc_b64 s[30:31]
968 %vec0 = call <3 x i64> asm "; def $0", "=v"()
969 %vec1 = call <3 x i64> asm "; def $0", "=v"()
970 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
971 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
975 define void @v_shuffle_v4i64_v3i64__5_5_5_1(ptr addrspace(1) inreg %ptr) {
976 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
978 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
979 ; GFX900-NEXT: ;;#ASMSTART
980 ; GFX900-NEXT: ; def v[0:5]
981 ; GFX900-NEXT: ;;#ASMEND
982 ; GFX900-NEXT: ;;#ASMSTART
983 ; GFX900-NEXT: ; def v[4:9]
984 ; GFX900-NEXT: ;;#ASMEND
985 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
986 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
987 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
988 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
989 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
990 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
991 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
992 ; GFX900-NEXT: s_waitcnt vmcnt(0)
993 ; GFX900-NEXT: s_setpc_b64 s[30:31]
995 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
997 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998 ; GFX90A-NEXT: ;;#ASMSTART
999 ; GFX90A-NEXT: ; def v[0:5]
1000 ; GFX90A-NEXT: ;;#ASMEND
1001 ; GFX90A-NEXT: ;;#ASMSTART
1002 ; GFX90A-NEXT: ; def v[4:9]
1003 ; GFX90A-NEXT: ;;#ASMEND
1004 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
1005 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
1006 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
1007 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
1008 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
1009 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
1010 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
1011 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1012 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1014 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
1016 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1017 ; GFX940-NEXT: ;;#ASMSTART
1018 ; GFX940-NEXT: ; def v[0:5]
1019 ; GFX940-NEXT: ;;#ASMEND
1020 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
1021 ; GFX940-NEXT: ;;#ASMSTART
1022 ; GFX940-NEXT: ; def v[4:9]
1023 ; GFX940-NEXT: ;;#ASMEND
1024 ; GFX940-NEXT: s_nop 0
1025 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
1026 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
1027 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
1028 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
1029 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
1030 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
1031 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1032 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1033 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1034 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1035 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
1036 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1040 define void @v_shuffle_v4i64_v3i64__5_5_5_2(ptr addrspace(1) inreg %ptr) {
1041 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
1043 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044 ; GFX900-NEXT: ;;#ASMSTART
1045 ; GFX900-NEXT: ; def v[0:5]
1046 ; GFX900-NEXT: ;;#ASMEND
1047 ; GFX900-NEXT: ;;#ASMSTART
1048 ; GFX900-NEXT: ; def v[6:11]
1049 ; GFX900-NEXT: ;;#ASMEND
1050 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
1051 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
1052 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
1053 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
1054 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
1055 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
1056 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
1057 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1058 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1060 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
1062 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063 ; GFX90A-NEXT: ;;#ASMSTART
1064 ; GFX90A-NEXT: ; def v[0:5]
1065 ; GFX90A-NEXT: ;;#ASMEND
1066 ; GFX90A-NEXT: ;;#ASMSTART
1067 ; GFX90A-NEXT: ; def v[6:11]
1068 ; GFX90A-NEXT: ;;#ASMEND
1069 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
1070 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
1071 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
1072 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
1073 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
1074 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
1075 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
1076 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1077 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1079 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
1081 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082 ; GFX940-NEXT: ;;#ASMSTART
1083 ; GFX940-NEXT: ; def v[0:5]
1084 ; GFX940-NEXT: ;;#ASMEND
1085 ; GFX940-NEXT: ;;#ASMSTART
1086 ; GFX940-NEXT: ; def v[6:11]
1087 ; GFX940-NEXT: ;;#ASMEND
1088 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
1089 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
1090 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
1091 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
1092 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
1093 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
1094 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
1095 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1096 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1097 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1098 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1099 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
1100 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1104 define void @v_shuffle_v4i64_v3i64__5_5_5_3(ptr addrspace(1) inreg %ptr) {
1105 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
1107 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108 ; GFX900-NEXT: ;;#ASMSTART
1109 ; GFX900-NEXT: ; def v[0:5]
1110 ; GFX900-NEXT: ;;#ASMEND
1111 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
1112 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
1113 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
1114 ; GFX900-NEXT: v_mov_b32_e32 v8, v0
1115 ; GFX900-NEXT: v_mov_b32_e32 v9, v1
1116 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
1117 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
1118 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
1119 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
1120 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1121 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1123 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
1125 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1126 ; GFX90A-NEXT: ;;#ASMSTART
1127 ; GFX90A-NEXT: ; def v[0:5]
1128 ; GFX90A-NEXT: ;;#ASMEND
1129 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
1130 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
1131 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
1132 ; GFX90A-NEXT: v_mov_b32_e32 v8, v0
1133 ; GFX90A-NEXT: v_mov_b32_e32 v9, v1
1134 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
1135 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
1136 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
1137 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
1138 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1139 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1141 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
1143 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144 ; GFX940-NEXT: ;;#ASMSTART
1145 ; GFX940-NEXT: ; def v[0:5]
1146 ; GFX940-NEXT: ;;#ASMEND
1147 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
1148 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
1149 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
1150 ; GFX940-NEXT: v_mov_b32_e32 v8, v0
1151 ; GFX940-NEXT: v_mov_b32_e32 v9, v1
1152 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
1153 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
1154 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
1155 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
1156 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1157 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1158 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1159 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1160 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
1161 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1165 define void @v_shuffle_v4i64_v3i64__5_5_5_4(ptr addrspace(1) inreg %ptr) {
1166 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
1168 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1169 ; GFX900-NEXT: ;;#ASMSTART
1170 ; GFX900-NEXT: ; def v[0:5]
1171 ; GFX900-NEXT: ;;#ASMEND
1172 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
1173 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
1174 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
1175 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1176 ; GFX900-NEXT: s_nop 0
1177 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
1178 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
1179 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
1180 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1181 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1183 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
1185 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186 ; GFX90A-NEXT: ;;#ASMSTART
1187 ; GFX90A-NEXT: ; def v[0:5]
1188 ; GFX90A-NEXT: ;;#ASMEND
1189 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1190 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
1191 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
1192 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1193 ; GFX90A-NEXT: s_nop 0
1194 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
1195 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
1196 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
1197 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1198 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1200 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
1202 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203 ; GFX940-NEXT: ;;#ASMSTART
1204 ; GFX940-NEXT: ; def v[0:5]
1205 ; GFX940-NEXT: ;;#ASMEND
1206 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
1207 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
1208 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
1209 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1210 ; GFX940-NEXT: s_nop 1
1211 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
1212 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
1213 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
1214 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1215 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1216 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1217 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1218 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
1219 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1223 define void @v_shuffle_v4i64_v3i64__5_5_5_5(ptr addrspace(1) inreg %ptr) {
1224 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
1226 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1227 ; GFX900-NEXT: ;;#ASMSTART
1228 ; GFX900-NEXT: ; def v[0:5]
1229 ; GFX900-NEXT: ;;#ASMEND
1230 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
1231 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
1232 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
1233 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
1234 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
1235 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1236 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1238 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
1240 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241 ; GFX90A-NEXT: ;;#ASMSTART
1242 ; GFX90A-NEXT: ; def v[0:5]
1243 ; GFX90A-NEXT: ;;#ASMEND
1244 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1245 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
1246 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
1247 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
1248 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
1249 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1250 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1252 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
1254 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255 ; GFX940-NEXT: ;;#ASMSTART
1256 ; GFX940-NEXT: ; def v[0:5]
1257 ; GFX940-NEXT: ;;#ASMEND
1258 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
1259 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
1260 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
1261 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
1262 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
1263 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1264 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1265 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1266 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1267 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
1268 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1272 define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
1273 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0:
1275 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276 ; GFX900-NEXT: ;;#ASMSTART
1277 ; GFX900-NEXT: ; def v[0:5]
1278 ; GFX900-NEXT: ;;#ASMEND
1279 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
1280 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1281 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1282 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1283 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
1284 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1285 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1287 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0:
1289 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290 ; GFX90A-NEXT: ;;#ASMSTART
1291 ; GFX90A-NEXT: ; def v[0:5]
1292 ; GFX90A-NEXT: ;;#ASMEND
1293 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1294 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1295 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1296 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1297 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
1298 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1299 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1301 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0:
1303 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1304 ; GFX940-NEXT: ;;#ASMSTART
1305 ; GFX940-NEXT: ; def v[0:5]
1306 ; GFX940-NEXT: ;;#ASMEND
1307 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
1308 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1309 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1310 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1311 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
1312 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1313 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1314 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1315 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
1316 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1320 define void @v_shuffle_v4i64_v3i64__0_0_0_0(ptr addrspace(1) inreg %ptr) {
1321 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0:
1323 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324 ; GFX900-NEXT: ;;#ASMSTART
1325 ; GFX900-NEXT: ; def v[0:5]
1326 ; GFX900-NEXT: ;;#ASMEND
1327 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
1328 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1329 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1330 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1331 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
1332 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1333 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1335 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0:
1337 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1338 ; GFX90A-NEXT: ;;#ASMSTART
1339 ; GFX90A-NEXT: ; def v[0:5]
1340 ; GFX90A-NEXT: ;;#ASMEND
1341 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1342 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1343 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1344 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1345 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
1346 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1347 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1349 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0:
1351 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352 ; GFX940-NEXT: ;;#ASMSTART
1353 ; GFX940-NEXT: ; def v[0:5]
1354 ; GFX940-NEXT: ;;#ASMEND
1355 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
1356 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1357 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1358 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1359 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
1360 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1361 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1362 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1363 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer
1364 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1368 define void @v_shuffle_v4i64_v3i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
1369 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0:
1371 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372 ; GFX900-NEXT: ;;#ASMSTART
1373 ; GFX900-NEXT: ; def v[0:5]
1374 ; GFX900-NEXT: ;;#ASMEND
1375 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
1376 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
1377 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
1378 ; GFX900-NEXT: v_mov_b32_e32 v6, v0
1379 ; GFX900-NEXT: v_mov_b32_e32 v7, v1
1380 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
1381 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
1382 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1383 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1385 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0:
1387 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1388 ; GFX90A-NEXT: ;;#ASMSTART
1389 ; GFX90A-NEXT: ; def v[0:5]
1390 ; GFX90A-NEXT: ;;#ASMEND
1391 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
1392 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
1393 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
1394 ; GFX90A-NEXT: v_mov_b32_e32 v6, v0
1395 ; GFX90A-NEXT: v_mov_b32_e32 v7, v1
1396 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
1397 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
1398 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1399 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1401 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0:
1403 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; GFX940-NEXT: ;;#ASMSTART
1405 ; GFX940-NEXT: ; def v[0:5]
1406 ; GFX940-NEXT: ;;#ASMEND
1407 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
1408 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
1409 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
1410 ; GFX940-NEXT: v_mov_b32_e32 v6, v0
1411 ; GFX940-NEXT: v_mov_b32_e32 v7, v1
1412 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
1413 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
1414 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1415 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1416 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1417 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1418 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1422 define void @v_shuffle_v4i64_v3i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
1423 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0:
1425 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426 ; GFX900-NEXT: ;;#ASMSTART
1427 ; GFX900-NEXT: ; def v[0:5]
1428 ; GFX900-NEXT: ;;#ASMEND
1429 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
1430 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1431 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1432 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1433 ; GFX900-NEXT: s_nop 0
1434 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
1435 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
1436 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
1437 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
1438 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
1439 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1440 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1442 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0:
1444 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445 ; GFX90A-NEXT: ;;#ASMSTART
1446 ; GFX90A-NEXT: ; def v[0:5]
1447 ; GFX90A-NEXT: ;;#ASMEND
1448 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1449 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1450 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1451 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1452 ; GFX90A-NEXT: s_nop 0
1453 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
1454 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
1455 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
1456 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
1457 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
1458 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1459 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1461 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0:
1463 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464 ; GFX940-NEXT: ;;#ASMSTART
1465 ; GFX940-NEXT: ; def v[0:5]
1466 ; GFX940-NEXT: ;;#ASMEND
1467 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
1468 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1469 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1470 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1471 ; GFX940-NEXT: s_nop 1
1472 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
1473 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
1474 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
1475 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
1476 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
1477 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1478 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1479 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1480 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
1481 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1485 define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
1486 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0:
1488 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1489 ; GFX900-NEXT: ;;#ASMSTART
1490 ; GFX900-NEXT: ; def v[0:5]
1491 ; GFX900-NEXT: ;;#ASMEND
1492 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
1493 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1494 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1495 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1496 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
1497 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1498 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1500 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0:
1502 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503 ; GFX90A-NEXT: ;;#ASMSTART
1504 ; GFX90A-NEXT: ; def v[0:5]
1505 ; GFX90A-NEXT: ;;#ASMEND
1506 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
1507 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1508 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1509 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1510 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
1511 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1512 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1514 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0:
1516 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517 ; GFX940-NEXT: ;;#ASMSTART
1518 ; GFX940-NEXT: ; def v[0:5]
1519 ; GFX940-NEXT: ;;#ASMEND
1520 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
1521 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1522 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1523 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1524 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
1525 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1526 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1527 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1528 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
1529 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1533 define void @v_shuffle_v4i64_v3i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
1534 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0:
1536 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537 ; GFX900-NEXT: ;;#ASMSTART
1538 ; GFX900-NEXT: ; def v[0:5]
1539 ; GFX900-NEXT: ;;#ASMEND
1540 ; GFX900-NEXT: ;;#ASMSTART
1541 ; GFX900-NEXT: ; def v[2:7]
1542 ; GFX900-NEXT: ;;#ASMEND
1543 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
1544 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1545 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1546 ; GFX900-NEXT: v_mov_b32_e32 v6, v0
1547 ; GFX900-NEXT: v_mov_b32_e32 v7, v1
1548 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1549 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
1550 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1551 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1553 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0:
1555 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556 ; GFX90A-NEXT: ;;#ASMSTART
1557 ; GFX90A-NEXT: ; def v[0:5]
1558 ; GFX90A-NEXT: ;;#ASMEND
1559 ; GFX90A-NEXT: ;;#ASMSTART
1560 ; GFX90A-NEXT: ; def v[2:7]
1561 ; GFX90A-NEXT: ;;#ASMEND
1562 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
1563 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1564 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1565 ; GFX90A-NEXT: v_mov_b32_e32 v6, v0
1566 ; GFX90A-NEXT: v_mov_b32_e32 v7, v1
1567 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1568 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
1569 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1570 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1572 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0:
1574 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1575 ; GFX940-NEXT: ;;#ASMSTART
1576 ; GFX940-NEXT: ; def v[0:5]
1577 ; GFX940-NEXT: ;;#ASMEND
1578 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
1579 ; GFX940-NEXT: ;;#ASMSTART
1580 ; GFX940-NEXT: ; def v[2:7]
1581 ; GFX940-NEXT: ;;#ASMEND
1582 ; GFX940-NEXT: s_nop 0
1583 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1584 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1585 ; GFX940-NEXT: v_mov_b32_e32 v6, v0
1586 ; GFX940-NEXT: v_mov_b32_e32 v7, v1
1587 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
1588 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
1589 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1590 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1591 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1592 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1593 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
1594 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1598 define void @v_shuffle_v4i64_v3i64__5_0_0_0(ptr addrspace(1) inreg %ptr) {
1599 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0:
1601 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1602 ; GFX900-NEXT: ;;#ASMSTART
1603 ; GFX900-NEXT: ; def v[0:5]
1604 ; GFX900-NEXT: ;;#ASMEND
1605 ; GFX900-NEXT: ;;#ASMSTART
1606 ; GFX900-NEXT: ; def v[2:7]
1607 ; GFX900-NEXT: ;;#ASMEND
1608 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
1609 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1610 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1611 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1612 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
1613 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
1614 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
1615 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
1616 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
1617 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1618 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1620 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0:
1622 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1623 ; GFX90A-NEXT: ;;#ASMSTART
1624 ; GFX90A-NEXT: ; def v[0:5]
1625 ; GFX90A-NEXT: ;;#ASMEND
1626 ; GFX90A-NEXT: ;;#ASMSTART
1627 ; GFX90A-NEXT: ; def v[2:7]
1628 ; GFX90A-NEXT: ;;#ASMEND
1629 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
1630 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1631 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1632 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1633 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
1634 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
1635 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
1636 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
1637 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
1638 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1639 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1641 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0:
1643 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644 ; GFX940-NEXT: ;;#ASMSTART
1645 ; GFX940-NEXT: ; def v[0:5]
1646 ; GFX940-NEXT: ;;#ASMEND
1647 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
1648 ; GFX940-NEXT: ;;#ASMSTART
1649 ; GFX940-NEXT: ; def v[2:7]
1650 ; GFX940-NEXT: ;;#ASMEND
1651 ; GFX940-NEXT: s_nop 0
1652 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1653 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1654 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
1655 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
1656 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
1657 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
1658 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
1659 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
1660 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1661 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1662 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1663 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1664 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
1665 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1669 define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
1670 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0:
1672 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673 ; GFX900-NEXT: ;;#ASMSTART
1674 ; GFX900-NEXT: ; def v[0:5]
1675 ; GFX900-NEXT: ;;#ASMEND
1676 ; GFX900-NEXT: ;;#ASMSTART
1677 ; GFX900-NEXT: ; def v[2:7]
1678 ; GFX900-NEXT: ;;#ASMEND
1679 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
1680 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1681 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1682 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1683 ; GFX900-NEXT: s_nop 0
1684 ; GFX900-NEXT: v_mov_b32_e32 v0, v6
1685 ; GFX900-NEXT: v_mov_b32_e32 v1, v7
1686 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
1687 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1688 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1690 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0:
1692 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693 ; GFX90A-NEXT: ;;#ASMSTART
1694 ; GFX90A-NEXT: ; def v[0:5]
1695 ; GFX90A-NEXT: ;;#ASMEND
1696 ; GFX90A-NEXT: ;;#ASMSTART
1697 ; GFX90A-NEXT: ; def v[2:7]
1698 ; GFX90A-NEXT: ;;#ASMEND
1699 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
1700 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1701 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1702 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1703 ; GFX90A-NEXT: s_nop 0
1704 ; GFX90A-NEXT: v_mov_b32_e32 v0, v6
1705 ; GFX90A-NEXT: v_mov_b32_e32 v1, v7
1706 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
1707 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1708 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1710 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0:
1712 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; GFX940-NEXT: ;;#ASMSTART
1714 ; GFX940-NEXT: ; def v[0:5]
1715 ; GFX940-NEXT: ;;#ASMEND
1716 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
1717 ; GFX940-NEXT: ;;#ASMSTART
1718 ; GFX940-NEXT: ; def v[2:7]
1719 ; GFX940-NEXT: ;;#ASMEND
1720 ; GFX940-NEXT: s_nop 0
1721 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1722 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1723 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
1724 ; GFX940-NEXT: s_nop 1
1725 ; GFX940-NEXT: v_mov_b32_e32 v0, v6
1726 ; GFX940-NEXT: v_mov_b32_e32 v1, v7
1727 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
1728 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1729 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1730 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1731 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1732 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
1733 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1737 define void @v_shuffle_v4i64_v3i64__5_1_0_0(ptr addrspace(1) inreg %ptr) {
1738 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
1740 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741 ; GFX900-NEXT: ;;#ASMSTART
1742 ; GFX900-NEXT: ; def v[0:5]
1743 ; GFX900-NEXT: ;;#ASMEND
1744 ; GFX900-NEXT: ;;#ASMSTART
1745 ; GFX900-NEXT: ; def v[4:9]
1746 ; GFX900-NEXT: ;;#ASMEND
1747 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
1748 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
1749 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
1750 ; GFX900-NEXT: v_mov_b32_e32 v6, v0
1751 ; GFX900-NEXT: v_mov_b32_e32 v7, v1
1752 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
1753 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
1754 ; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
1755 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
1756 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1757 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1759 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
1761 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1762 ; GFX90A-NEXT: ;;#ASMSTART
1763 ; GFX90A-NEXT: ; def v[0:5]
1764 ; GFX90A-NEXT: ;;#ASMEND
1765 ; GFX90A-NEXT: ;;#ASMSTART
1766 ; GFX90A-NEXT: ; def v[4:9]
1767 ; GFX90A-NEXT: ;;#ASMEND
1768 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
1769 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
1770 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
1771 ; GFX90A-NEXT: v_mov_b32_e32 v6, v0
1772 ; GFX90A-NEXT: v_mov_b32_e32 v7, v1
1773 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
1774 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
1775 ; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
1776 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
1777 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1778 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1780 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
1782 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1783 ; GFX940-NEXT: ;;#ASMSTART
1784 ; GFX940-NEXT: ; def v[0:5]
1785 ; GFX940-NEXT: ;;#ASMEND
1786 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
1787 ; GFX940-NEXT: ;;#ASMSTART
1788 ; GFX940-NEXT: ; def v[4:9]
1789 ; GFX940-NEXT: ;;#ASMEND
1790 ; GFX940-NEXT: s_nop 0
1791 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
1792 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
1793 ; GFX940-NEXT: v_mov_b32_e32 v6, v0
1794 ; GFX940-NEXT: v_mov_b32_e32 v7, v1
1795 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
1796 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
1797 ; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
1798 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
1799 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1800 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1801 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1802 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1803 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
1804 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1808 define void @v_shuffle_v4i64_v3i64__5_2_0_0(ptr addrspace(1) inreg %ptr) {
1809 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0:
1811 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812 ; GFX900-NEXT: ;;#ASMSTART
1813 ; GFX900-NEXT: ; def v[0:5]
1814 ; GFX900-NEXT: ;;#ASMEND
1815 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
1816 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1817 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1818 ; GFX900-NEXT: ;;#ASMSTART
1819 ; GFX900-NEXT: ; def v[6:11]
1820 ; GFX900-NEXT: ;;#ASMEND
1821 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
1822 ; GFX900-NEXT: s_nop 0
1823 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
1824 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
1825 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
1826 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1827 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1829 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0:
1831 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1832 ; GFX90A-NEXT: ;;#ASMSTART
1833 ; GFX90A-NEXT: ; def v[0:5]
1834 ; GFX90A-NEXT: ;;#ASMEND
1835 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
1836 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1837 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1838 ; GFX90A-NEXT: ;;#ASMSTART
1839 ; GFX90A-NEXT: ; def v[6:11]
1840 ; GFX90A-NEXT: ;;#ASMEND
1841 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
1842 ; GFX90A-NEXT: s_nop 0
1843 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
1844 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
1845 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
1846 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1847 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1849 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0:
1851 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852 ; GFX940-NEXT: ;;#ASMSTART
1853 ; GFX940-NEXT: ; def v[0:5]
1854 ; GFX940-NEXT: ;;#ASMEND
1855 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
1856 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1857 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1858 ; GFX940-NEXT: ;;#ASMSTART
1859 ; GFX940-NEXT: ; def v[6:11]
1860 ; GFX940-NEXT: ;;#ASMEND
1861 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
1862 ; GFX940-NEXT: s_nop 1
1863 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
1864 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
1865 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
1866 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1867 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1868 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1869 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1870 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
1871 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1875 define void @v_shuffle_v4i64_v3i64__5_3_0_0(ptr addrspace(1) inreg %ptr) {
1876 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0:
1878 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879 ; GFX900-NEXT: ;;#ASMSTART
1880 ; GFX900-NEXT: ; def v[0:5]
1881 ; GFX900-NEXT: ;;#ASMEND
1882 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
1883 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1884 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1885 ; GFX900-NEXT: ;;#ASMSTART
1886 ; GFX900-NEXT: ; def v[4:9]
1887 ; GFX900-NEXT: ;;#ASMEND
1888 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
1889 ; GFX900-NEXT: s_nop 0
1890 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
1891 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
1892 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
1893 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
1894 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
1895 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1896 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1898 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0:
1900 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1901 ; GFX90A-NEXT: ;;#ASMSTART
1902 ; GFX90A-NEXT: ; def v[0:5]
1903 ; GFX90A-NEXT: ;;#ASMEND
1904 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
1905 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1906 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1907 ; GFX90A-NEXT: ;;#ASMSTART
1908 ; GFX90A-NEXT: ; def v[4:9]
1909 ; GFX90A-NEXT: ;;#ASMEND
1910 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
1911 ; GFX90A-NEXT: s_nop 0
1912 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
1913 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
1914 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
1915 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
1916 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
1917 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1918 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1920 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0:
1922 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1923 ; GFX940-NEXT: ;;#ASMSTART
1924 ; GFX940-NEXT: ; def v[0:5]
1925 ; GFX940-NEXT: ;;#ASMEND
1926 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
1927 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
1928 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
1929 ; GFX940-NEXT: ;;#ASMSTART
1930 ; GFX940-NEXT: ; def v[4:9]
1931 ; GFX940-NEXT: ;;#ASMEND
1932 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
1933 ; GFX940-NEXT: s_nop 1
1934 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
1935 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
1936 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
1937 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
1938 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
1939 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1940 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1941 %vec0 = call <3 x i64> asm "; def $0", "=v"()
1942 %vec1 = call <3 x i64> asm "; def $0", "=v"()
1943 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
1944 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1948 define void @v_shuffle_v4i64_v3i64__5_4_0_0(ptr addrspace(1) inreg %ptr) {
1949 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0:
1951 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1952 ; GFX900-NEXT: ;;#ASMSTART
1953 ; GFX900-NEXT: ; def v[0:5]
1954 ; GFX900-NEXT: ;;#ASMEND
1955 ; GFX900-NEXT: ;;#ASMSTART
1956 ; GFX900-NEXT: ; def v[2:7]
1957 ; GFX900-NEXT: ;;#ASMEND
1958 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
1959 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
1960 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1961 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1962 ; GFX900-NEXT: s_nop 0
1963 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
1964 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
1965 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
1966 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1967 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1969 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0:
1971 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1972 ; GFX90A-NEXT: ;;#ASMSTART
1973 ; GFX90A-NEXT: ; def v[0:5]
1974 ; GFX90A-NEXT: ;;#ASMEND
1975 ; GFX90A-NEXT: ;;#ASMSTART
1976 ; GFX90A-NEXT: ; def v[2:7]
1977 ; GFX90A-NEXT: ;;#ASMEND
1978 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
1979 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
1980 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1981 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1982 ; GFX90A-NEXT: s_nop 0
1983 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
1984 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
1985 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
1986 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1987 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1989 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0:
1991 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992 ; GFX940-NEXT: ;;#ASMSTART
1993 ; GFX940-NEXT: ; def v[0:5]
1994 ; GFX940-NEXT: ;;#ASMEND
1995 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
1996 ; GFX940-NEXT: ;;#ASMSTART
1997 ; GFX940-NEXT: ; def v[2:7]
1998 ; GFX940-NEXT: ;;#ASMEND
1999 ; GFX940-NEXT: s_nop 0
2000 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
2001 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
2002 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
2003 ; GFX940-NEXT: s_nop 1
2004 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
2005 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
2006 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
2007 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2008 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2009 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2010 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2011 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
2012 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2016 define void @v_shuffle_v4i64_v3i64__5_5_0_0(ptr addrspace(1) inreg %ptr) {
2017 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0:
2019 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2020 ; GFX900-NEXT: ;;#ASMSTART
2021 ; GFX900-NEXT: ; def v[0:5]
2022 ; GFX900-NEXT: ;;#ASMEND
2023 ; GFX900-NEXT: ;;#ASMSTART
2024 ; GFX900-NEXT: ; def v[2:7]
2025 ; GFX900-NEXT: ;;#ASMEND
2026 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
2027 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
2028 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
2029 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
2030 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
2031 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2032 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2033 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2034 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2036 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0:
2038 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039 ; GFX90A-NEXT: ;;#ASMSTART
2040 ; GFX90A-NEXT: ; def v[0:5]
2041 ; GFX90A-NEXT: ;;#ASMEND
2042 ; GFX90A-NEXT: ;;#ASMSTART
2043 ; GFX90A-NEXT: ; def v[2:7]
2044 ; GFX90A-NEXT: ;;#ASMEND
2045 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
2046 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
2047 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
2048 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
2049 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
2050 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2051 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2052 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2053 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2055 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0:
2057 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2058 ; GFX940-NEXT: ;;#ASMSTART
2059 ; GFX940-NEXT: ; def v[0:5]
2060 ; GFX940-NEXT: ;;#ASMEND
2061 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
2062 ; GFX940-NEXT: ;;#ASMSTART
2063 ; GFX940-NEXT: ; def v[2:7]
2064 ; GFX940-NEXT: ;;#ASMEND
2065 ; GFX940-NEXT: s_nop 0
2066 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
2067 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
2068 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
2069 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
2070 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
2071 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2072 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2073 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2074 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2075 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2076 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
2077 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2081 define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
2082 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0:
2084 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2085 ; GFX900-NEXT: ;;#ASMSTART
2086 ; GFX900-NEXT: ; def v[0:5]
2087 ; GFX900-NEXT: ;;#ASMEND
2088 ; GFX900-NEXT: ;;#ASMSTART
2089 ; GFX900-NEXT: ; def v[2:7]
2090 ; GFX900-NEXT: ;;#ASMEND
2091 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
2092 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
2093 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
2094 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
2095 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
2096 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2097 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2098 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2099 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2101 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0:
2103 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2104 ; GFX90A-NEXT: ;;#ASMSTART
2105 ; GFX90A-NEXT: ; def v[0:5]
2106 ; GFX90A-NEXT: ;;#ASMEND
2107 ; GFX90A-NEXT: ;;#ASMSTART
2108 ; GFX90A-NEXT: ; def v[2:7]
2109 ; GFX90A-NEXT: ;;#ASMEND
2110 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
2111 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
2112 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
2113 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
2114 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
2115 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2116 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2117 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2118 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2120 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0:
2122 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2123 ; GFX940-NEXT: ;;#ASMSTART
2124 ; GFX940-NEXT: ; def v[0:5]
2125 ; GFX940-NEXT: ;;#ASMEND
2126 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
2127 ; GFX940-NEXT: ;;#ASMSTART
2128 ; GFX940-NEXT: ; def v[2:7]
2129 ; GFX940-NEXT: ;;#ASMEND
2130 ; GFX940-NEXT: s_nop 0
2131 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
2132 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
2133 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
2134 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
2135 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
2136 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2137 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2138 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2139 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2140 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2141 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
2142 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2146 define void @v_shuffle_v4i64_v3i64__5_5_1_0(ptr addrspace(1) inreg %ptr) {
2147 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0:
2149 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150 ; GFX900-NEXT: ;;#ASMSTART
2151 ; GFX900-NEXT: ; def v[0:5]
2152 ; GFX900-NEXT: ;;#ASMEND
2153 ; GFX900-NEXT: ;;#ASMSTART
2154 ; GFX900-NEXT: ; def v[4:9]
2155 ; GFX900-NEXT: ;;#ASMEND
2156 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
2157 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
2158 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
2159 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
2160 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
2161 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2162 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
2163 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2164 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2166 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0:
2168 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169 ; GFX90A-NEXT: ;;#ASMSTART
2170 ; GFX90A-NEXT: ; def v[0:5]
2171 ; GFX90A-NEXT: ;;#ASMEND
2172 ; GFX90A-NEXT: ;;#ASMSTART
2173 ; GFX90A-NEXT: ; def v[4:9]
2174 ; GFX90A-NEXT: ;;#ASMEND
2175 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
2176 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
2177 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
2178 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
2179 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
2180 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2181 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
2182 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2183 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2185 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0:
2187 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2188 ; GFX940-NEXT: ;;#ASMSTART
2189 ; GFX940-NEXT: ; def v[0:5]
2190 ; GFX940-NEXT: ;;#ASMEND
2191 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
2192 ; GFX940-NEXT: ;;#ASMSTART
2193 ; GFX940-NEXT: ; def v[4:9]
2194 ; GFX940-NEXT: ;;#ASMEND
2195 ; GFX940-NEXT: s_nop 0
2196 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
2197 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
2198 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
2199 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
2200 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2201 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
2202 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2203 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2204 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2205 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2206 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
2207 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2211 define void @v_shuffle_v4i64_v3i64__5_5_2_0(ptr addrspace(1) inreg %ptr) {
2212 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0:
2214 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2215 ; GFX900-NEXT: ;;#ASMSTART
2216 ; GFX900-NEXT: ; def v[0:5]
2217 ; GFX900-NEXT: ;;#ASMEND
2218 ; GFX900-NEXT: ;;#ASMSTART
2219 ; GFX900-NEXT: ; def v[6:11]
2220 ; GFX900-NEXT: ;;#ASMEND
2221 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
2222 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
2223 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
2224 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
2225 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
2226 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
2227 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
2228 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
2229 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
2230 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2231 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2233 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0:
2235 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2236 ; GFX90A-NEXT: ;;#ASMSTART
2237 ; GFX90A-NEXT: ; def v[0:5]
2238 ; GFX90A-NEXT: ;;#ASMEND
2239 ; GFX90A-NEXT: ;;#ASMSTART
2240 ; GFX90A-NEXT: ; def v[6:11]
2241 ; GFX90A-NEXT: ;;#ASMEND
2242 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
2243 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
2244 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
2245 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
2246 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
2247 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
2248 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
2249 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
2250 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
2251 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2252 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2254 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0:
2256 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2257 ; GFX940-NEXT: ;;#ASMSTART
2258 ; GFX940-NEXT: ; def v[0:5]
2259 ; GFX940-NEXT: ;;#ASMEND
2260 ; GFX940-NEXT: ;;#ASMSTART
2261 ; GFX940-NEXT: ; def v[6:11]
2262 ; GFX940-NEXT: ;;#ASMEND
2263 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
2264 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
2265 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
2266 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
2267 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
2268 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
2269 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
2270 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
2271 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
2272 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2273 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2274 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2275 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2276 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
2277 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2281 define void @v_shuffle_v4i64_v3i64__5_5_3_0(ptr addrspace(1) inreg %ptr) {
2282 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0:
2284 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2285 ; GFX900-NEXT: ;;#ASMSTART
2286 ; GFX900-NEXT: ; def v[0:5]
2287 ; GFX900-NEXT: ;;#ASMEND
2288 ; GFX900-NEXT: ;;#ASMSTART
2289 ; GFX900-NEXT: ; def v[2:7]
2290 ; GFX900-NEXT: ;;#ASMEND
2291 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
2292 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
2293 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
2294 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2295 ; GFX900-NEXT: s_nop 0
2296 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
2297 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
2298 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2299 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2300 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2302 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0:
2304 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305 ; GFX90A-NEXT: ;;#ASMSTART
2306 ; GFX90A-NEXT: ; def v[0:5]
2307 ; GFX90A-NEXT: ;;#ASMEND
2308 ; GFX90A-NEXT: ;;#ASMSTART
2309 ; GFX90A-NEXT: ; def v[2:7]
2310 ; GFX90A-NEXT: ;;#ASMEND
2311 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
2312 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
2313 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
2314 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2315 ; GFX90A-NEXT: s_nop 0
2316 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
2317 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
2318 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2319 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2320 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2322 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0:
2324 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325 ; GFX940-NEXT: ;;#ASMSTART
2326 ; GFX940-NEXT: ; def v[0:5]
2327 ; GFX940-NEXT: ;;#ASMEND
2328 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
2329 ; GFX940-NEXT: ;;#ASMSTART
2330 ; GFX940-NEXT: ; def v[2:7]
2331 ; GFX940-NEXT: ;;#ASMEND
2332 ; GFX940-NEXT: s_nop 0
2333 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
2334 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
2335 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
2336 ; GFX940-NEXT: s_nop 1
2337 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
2338 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
2339 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2340 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2341 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2342 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2343 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2344 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
2345 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2349 define void @v_shuffle_v4i64_v3i64__5_5_4_0(ptr addrspace(1) inreg %ptr) {
2350 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0:
2352 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353 ; GFX900-NEXT: ;;#ASMSTART
2354 ; GFX900-NEXT: ; def v[0:5]
2355 ; GFX900-NEXT: ;;#ASMEND
2356 ; GFX900-NEXT: ;;#ASMSTART
2357 ; GFX900-NEXT: ; def v[2:7]
2358 ; GFX900-NEXT: ;;#ASMEND
2359 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
2360 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
2361 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
2362 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
2363 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
2364 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2365 ; GFX900-NEXT: s_nop 0
2366 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
2367 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
2368 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2369 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2370 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2372 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0:
2374 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375 ; GFX90A-NEXT: ;;#ASMSTART
2376 ; GFX90A-NEXT: ; def v[0:5]
2377 ; GFX90A-NEXT: ;;#ASMEND
2378 ; GFX90A-NEXT: ;;#ASMSTART
2379 ; GFX90A-NEXT: ; def v[2:7]
2380 ; GFX90A-NEXT: ;;#ASMEND
2381 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
2382 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
2383 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
2384 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
2385 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
2386 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2387 ; GFX90A-NEXT: s_nop 0
2388 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
2389 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
2390 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
2391 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2392 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2394 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0:
2396 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2397 ; GFX940-NEXT: ;;#ASMSTART
2398 ; GFX940-NEXT: ; def v[0:5]
2399 ; GFX940-NEXT: ;;#ASMEND
2400 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
2401 ; GFX940-NEXT: ;;#ASMSTART
2402 ; GFX940-NEXT: ; def v[2:7]
2403 ; GFX940-NEXT: ;;#ASMEND
2404 ; GFX940-NEXT: s_nop 0
2405 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
2406 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
2407 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
2408 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
2409 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
2410 ; GFX940-NEXT: s_nop 1
2411 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
2412 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
2413 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2414 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2415 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2416 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2417 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2418 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
2419 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2423 define void @v_shuffle_v4i64_v3i64__u_1_1_1(ptr addrspace(1) inreg %ptr) {
2424 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1:
2426 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2427 ; GFX900-NEXT: ;;#ASMSTART
2428 ; GFX900-NEXT: ; def v[0:5]
2429 ; GFX900-NEXT: ;;#ASMEND
2430 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
2431 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2432 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2433 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2434 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2435 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2436 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2438 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1:
2440 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2441 ; GFX90A-NEXT: ;;#ASMSTART
2442 ; GFX90A-NEXT: ; def v[0:5]
2443 ; GFX90A-NEXT: ;;#ASMEND
2444 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
2445 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2446 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2447 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2448 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2449 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2450 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2452 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1:
2454 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2455 ; GFX940-NEXT: ;;#ASMSTART
2456 ; GFX940-NEXT: ; def v[0:5]
2457 ; GFX940-NEXT: ;;#ASMEND
2458 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
2459 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2460 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2461 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2462 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2463 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2464 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2465 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2466 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
2467 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2471 define void @v_shuffle_v4i64_v3i64__0_1_1_1(ptr addrspace(1) inreg %ptr) {
2472 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1:
2474 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2475 ; GFX900-NEXT: ;;#ASMSTART
2476 ; GFX900-NEXT: ; def v[0:5]
2477 ; GFX900-NEXT: ;;#ASMEND
2478 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
2479 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2480 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2481 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2482 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2483 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2484 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2486 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1:
2488 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2489 ; GFX90A-NEXT: ;;#ASMSTART
2490 ; GFX90A-NEXT: ; def v[0:5]
2491 ; GFX90A-NEXT: ;;#ASMEND
2492 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
2493 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2494 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2495 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2496 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2497 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2498 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2500 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1:
2502 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2503 ; GFX940-NEXT: ;;#ASMSTART
2504 ; GFX940-NEXT: ; def v[0:5]
2505 ; GFX940-NEXT: ;;#ASMEND
2506 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
2507 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2508 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2509 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2510 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2511 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2512 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2513 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2514 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
2515 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2519 define void @v_shuffle_v4i64_v3i64__1_1_1_1(ptr addrspace(1) inreg %ptr) {
2520 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1:
2522 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523 ; GFX900-NEXT: ;;#ASMSTART
2524 ; GFX900-NEXT: ; def v[0:5]
2525 ; GFX900-NEXT: ;;#ASMEND
2526 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
2527 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2528 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2529 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2530 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
2531 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2532 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2534 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1:
2536 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2537 ; GFX90A-NEXT: ;;#ASMSTART
2538 ; GFX90A-NEXT: ; def v[0:5]
2539 ; GFX90A-NEXT: ;;#ASMEND
2540 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
2541 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2542 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2543 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2544 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
2545 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2546 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2548 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1:
2550 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2551 ; GFX940-NEXT: ;;#ASMSTART
2552 ; GFX940-NEXT: ; def v[0:5]
2553 ; GFX940-NEXT: ;;#ASMEND
2554 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
2555 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2556 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2557 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2558 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
2559 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2560 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2561 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2562 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2563 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2567 define void @v_shuffle_v4i64_v3i64__2_1_1_1(ptr addrspace(1) inreg %ptr) {
2568 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1:
2570 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2571 ; GFX900-NEXT: ;;#ASMSTART
2572 ; GFX900-NEXT: ; def v[0:5]
2573 ; GFX900-NEXT: ;;#ASMEND
2574 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
2575 ; GFX900-NEXT: v_mov_b32_e32 v0, v2
2576 ; GFX900-NEXT: v_mov_b32_e32 v1, v3
2577 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
2578 ; GFX900-NEXT: s_nop 0
2579 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
2580 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
2581 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2582 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2583 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2585 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1:
2587 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2588 ; GFX90A-NEXT: ;;#ASMSTART
2589 ; GFX90A-NEXT: ; def v[0:5]
2590 ; GFX90A-NEXT: ;;#ASMEND
2591 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
2592 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
2593 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
2594 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
2595 ; GFX90A-NEXT: s_nop 0
2596 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
2597 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
2598 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2599 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2600 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2602 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1:
2604 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2605 ; GFX940-NEXT: ;;#ASMSTART
2606 ; GFX940-NEXT: ; def v[0:5]
2607 ; GFX940-NEXT: ;;#ASMEND
2608 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
2609 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
2610 ; GFX940-NEXT: v_mov_b32_e32 v1, v3
2611 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
2612 ; GFX940-NEXT: s_nop 1
2613 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
2614 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
2615 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2616 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2617 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2618 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2619 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
2620 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2624 define void @v_shuffle_v4i64_v3i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
2625 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1:
2627 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2628 ; GFX900-NEXT: ;;#ASMSTART
2629 ; GFX900-NEXT: ; def v[0:5]
2630 ; GFX900-NEXT: ;;#ASMEND
2631 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
2632 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2633 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2634 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2635 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2636 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2637 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2639 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1:
2641 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2642 ; GFX90A-NEXT: ;;#ASMSTART
2643 ; GFX90A-NEXT: ; def v[0:5]
2644 ; GFX90A-NEXT: ;;#ASMEND
2645 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
2646 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2647 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2648 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2649 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
2650 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2651 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2653 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1:
2655 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2656 ; GFX940-NEXT: ;;#ASMSTART
2657 ; GFX940-NEXT: ; def v[0:5]
2658 ; GFX940-NEXT: ;;#ASMEND
2659 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
2660 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2661 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2662 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2663 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2664 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2665 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2666 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2667 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
2668 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2672 define void @v_shuffle_v4i64_v3i64__4_1_1_1(ptr addrspace(1) inreg %ptr) {
2673 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1:
2675 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2676 ; GFX900-NEXT: ;;#ASMSTART
2677 ; GFX900-NEXT: ; def v[0:5]
2678 ; GFX900-NEXT: ;;#ASMEND
2679 ; GFX900-NEXT: ;;#ASMSTART
2680 ; GFX900-NEXT: ; def v[4:9]
2681 ; GFX900-NEXT: ;;#ASMEND
2682 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
2683 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2684 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2685 ; GFX900-NEXT: v_mov_b32_e32 v8, v2
2686 ; GFX900-NEXT: v_mov_b32_e32 v9, v3
2687 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2688 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
2689 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2690 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2692 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1:
2694 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2695 ; GFX90A-NEXT: ;;#ASMSTART
2696 ; GFX90A-NEXT: ; def v[0:5]
2697 ; GFX90A-NEXT: ;;#ASMEND
2698 ; GFX90A-NEXT: ;;#ASMSTART
2699 ; GFX90A-NEXT: ; def v[4:9]
2700 ; GFX90A-NEXT: ;;#ASMEND
2701 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
2702 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2703 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2704 ; GFX90A-NEXT: v_mov_b32_e32 v8, v2
2705 ; GFX90A-NEXT: v_mov_b32_e32 v9, v3
2706 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2707 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
2708 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2709 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2711 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1:
2713 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2714 ; GFX940-NEXT: ;;#ASMSTART
2715 ; GFX940-NEXT: ; def v[0:5]
2716 ; GFX940-NEXT: ;;#ASMEND
2717 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
2718 ; GFX940-NEXT: ;;#ASMSTART
2719 ; GFX940-NEXT: ; def v[4:9]
2720 ; GFX940-NEXT: ;;#ASMEND
2721 ; GFX940-NEXT: s_nop 0
2722 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2723 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2724 ; GFX940-NEXT: v_mov_b32_e32 v8, v2
2725 ; GFX940-NEXT: v_mov_b32_e32 v9, v3
2726 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2727 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
2728 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2729 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2730 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2731 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2732 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
2733 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2737 define void @v_shuffle_v4i64_v3i64__5_1_1_1(ptr addrspace(1) inreg %ptr) {
2738 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1:
2740 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2741 ; GFX900-NEXT: ;;#ASMSTART
2742 ; GFX900-NEXT: ; def v[0:5]
2743 ; GFX900-NEXT: ;;#ASMEND
2744 ; GFX900-NEXT: ;;#ASMSTART
2745 ; GFX900-NEXT: ; def v[4:9]
2746 ; GFX900-NEXT: ;;#ASMEND
2747 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
2748 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2749 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2750 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
2751 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
2752 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2753 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
2754 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2755 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2757 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1:
2759 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2760 ; GFX90A-NEXT: ;;#ASMSTART
2761 ; GFX90A-NEXT: ; def v[0:5]
2762 ; GFX90A-NEXT: ;;#ASMEND
2763 ; GFX90A-NEXT: ;;#ASMSTART
2764 ; GFX90A-NEXT: ; def v[4:9]
2765 ; GFX90A-NEXT: ;;#ASMEND
2766 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
2767 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2768 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2769 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
2770 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
2771 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2772 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
2773 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2774 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2776 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1:
2778 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2779 ; GFX940-NEXT: ;;#ASMSTART
2780 ; GFX940-NEXT: ; def v[0:5]
2781 ; GFX940-NEXT: ;;#ASMEND
2782 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
2783 ; GFX940-NEXT: ;;#ASMSTART
2784 ; GFX940-NEXT: ; def v[4:9]
2785 ; GFX940-NEXT: ;;#ASMEND
2786 ; GFX940-NEXT: s_nop 0
2787 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2788 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2789 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
2790 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
2791 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2792 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
2793 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2794 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2795 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2796 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2797 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
2798 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2802 define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
2803 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1:
2805 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2806 ; GFX900-NEXT: ;;#ASMSTART
2807 ; GFX900-NEXT: ; def v[0:5]
2808 ; GFX900-NEXT: ;;#ASMEND
2809 ; GFX900-NEXT: ;;#ASMSTART
2810 ; GFX900-NEXT: ; def v[4:9]
2811 ; GFX900-NEXT: ;;#ASMEND
2812 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
2813 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2814 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2815 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
2816 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
2817 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2818 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
2819 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2820 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2822 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1:
2824 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2825 ; GFX90A-NEXT: ;;#ASMSTART
2826 ; GFX90A-NEXT: ; def v[0:5]
2827 ; GFX90A-NEXT: ;;#ASMEND
2828 ; GFX90A-NEXT: ;;#ASMSTART
2829 ; GFX90A-NEXT: ; def v[4:9]
2830 ; GFX90A-NEXT: ;;#ASMEND
2831 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
2832 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2833 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2834 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
2835 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
2836 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2837 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
2838 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2839 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2841 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1:
2843 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2844 ; GFX940-NEXT: ;;#ASMSTART
2845 ; GFX940-NEXT: ; def v[0:5]
2846 ; GFX940-NEXT: ;;#ASMEND
2847 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
2848 ; GFX940-NEXT: ;;#ASMSTART
2849 ; GFX940-NEXT: ; def v[4:9]
2850 ; GFX940-NEXT: ;;#ASMEND
2851 ; GFX940-NEXT: s_nop 0
2852 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2853 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2854 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
2855 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
2856 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2857 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
2858 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2859 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2860 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2861 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2862 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
2863 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2867 define void @v_shuffle_v4i64_v3i64__5_0_1_1(ptr addrspace(1) inreg %ptr) {
2868 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1:
2870 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2871 ; GFX900-NEXT: ;;#ASMSTART
2872 ; GFX900-NEXT: ; def v[0:5]
2873 ; GFX900-NEXT: ;;#ASMEND
2874 ; GFX900-NEXT: ;;#ASMSTART
2875 ; GFX900-NEXT: ; def v[4:9]
2876 ; GFX900-NEXT: ;;#ASMEND
2877 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
2878 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
2879 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
2880 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2881 ; GFX900-NEXT: s_nop 0
2882 ; GFX900-NEXT: v_mov_b32_e32 v2, v8
2883 ; GFX900-NEXT: v_mov_b32_e32 v3, v9
2884 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
2885 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
2886 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
2887 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2888 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2890 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1:
2892 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2893 ; GFX90A-NEXT: ;;#ASMSTART
2894 ; GFX90A-NEXT: ; def v[0:5]
2895 ; GFX90A-NEXT: ;;#ASMEND
2896 ; GFX90A-NEXT: ;;#ASMSTART
2897 ; GFX90A-NEXT: ; def v[4:9]
2898 ; GFX90A-NEXT: ;;#ASMEND
2899 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
2900 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
2901 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
2902 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2903 ; GFX90A-NEXT: s_nop 0
2904 ; GFX90A-NEXT: v_mov_b32_e32 v2, v8
2905 ; GFX90A-NEXT: v_mov_b32_e32 v3, v9
2906 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
2907 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
2908 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
2909 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2910 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2912 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1:
2914 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2915 ; GFX940-NEXT: ;;#ASMSTART
2916 ; GFX940-NEXT: ; def v[0:5]
2917 ; GFX940-NEXT: ;;#ASMEND
2918 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
2919 ; GFX940-NEXT: ;;#ASMSTART
2920 ; GFX940-NEXT: ; def v[4:9]
2921 ; GFX940-NEXT: ;;#ASMEND
2922 ; GFX940-NEXT: s_nop 0
2923 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
2924 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
2925 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2926 ; GFX940-NEXT: s_nop 1
2927 ; GFX940-NEXT: v_mov_b32_e32 v2, v8
2928 ; GFX940-NEXT: v_mov_b32_e32 v3, v9
2929 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
2930 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
2931 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
2932 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2933 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2934 %vec0 = call <3 x i64> asm "; def $0", "=v"()
2935 %vec1 = call <3 x i64> asm "; def $0", "=v"()
2936 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
2937 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2941 define void @v_shuffle_v4i64_v3i64__5_2_1_1(ptr addrspace(1) inreg %ptr) {
2942 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1:
2944 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2945 ; GFX900-NEXT: ;;#ASMSTART
2946 ; GFX900-NEXT: ; def v[0:5]
2947 ; GFX900-NEXT: ;;#ASMEND
2948 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
2949 ; GFX900-NEXT: v_mov_b32_e32 v0, v2
2950 ; GFX900-NEXT: v_mov_b32_e32 v1, v3
2951 ; GFX900-NEXT: ;;#ASMSTART
2952 ; GFX900-NEXT: ; def v[6:11]
2953 ; GFX900-NEXT: ;;#ASMEND
2954 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
2955 ; GFX900-NEXT: s_nop 0
2956 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
2957 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
2958 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
2959 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2960 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2962 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1:
2964 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2965 ; GFX90A-NEXT: ;;#ASMSTART
2966 ; GFX90A-NEXT: ; def v[0:5]
2967 ; GFX90A-NEXT: ;;#ASMEND
2968 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
2969 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
2970 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
2971 ; GFX90A-NEXT: ;;#ASMSTART
2972 ; GFX90A-NEXT: ; def v[6:11]
2973 ; GFX90A-NEXT: ;;#ASMEND
2974 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
2975 ; GFX90A-NEXT: s_nop 0
2976 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
2977 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
2978 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
2979 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2980 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2982 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1:
2984 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2985 ; GFX940-NEXT: ;;#ASMSTART
2986 ; GFX940-NEXT: ; def v[0:5]
2987 ; GFX940-NEXT: ;;#ASMEND
2988 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
2989 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
2990 ; GFX940-NEXT: v_mov_b32_e32 v1, v3
2991 ; GFX940-NEXT: ;;#ASMSTART
2992 ; GFX940-NEXT: ; def v[6:11]
2993 ; GFX940-NEXT: ;;#ASMEND
2994 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
2995 ; GFX940-NEXT: s_nop 1
2996 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
2997 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
2998 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
2999 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3000 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3001 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3002 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3003 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
3004 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3008 define void @v_shuffle_v4i64_v3i64__5_3_1_1(ptr addrspace(1) inreg %ptr) {
3009 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1:
3011 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3012 ; GFX900-NEXT: ;;#ASMSTART
3013 ; GFX900-NEXT: ; def v[0:5]
3014 ; GFX900-NEXT: ;;#ASMEND
3015 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
3016 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
3017 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
3018 ; GFX900-NEXT: ;;#ASMSTART
3019 ; GFX900-NEXT: ; def v[6:11]
3020 ; GFX900-NEXT: ;;#ASMEND
3021 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3022 ; GFX900-NEXT: v_mov_b32_e32 v0, v10
3023 ; GFX900-NEXT: v_mov_b32_e32 v1, v11
3024 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
3025 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
3026 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
3027 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3028 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3030 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1:
3032 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033 ; GFX90A-NEXT: ;;#ASMSTART
3034 ; GFX90A-NEXT: ; def v[0:5]
3035 ; GFX90A-NEXT: ;;#ASMEND
3036 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
3037 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
3038 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
3039 ; GFX90A-NEXT: ;;#ASMSTART
3040 ; GFX90A-NEXT: ; def v[6:11]
3041 ; GFX90A-NEXT: ;;#ASMEND
3042 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3043 ; GFX90A-NEXT: v_mov_b32_e32 v0, v10
3044 ; GFX90A-NEXT: v_mov_b32_e32 v1, v11
3045 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
3046 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
3047 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
3048 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3049 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3051 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1:
3053 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3054 ; GFX940-NEXT: ;;#ASMSTART
3055 ; GFX940-NEXT: ; def v[0:5]
3056 ; GFX940-NEXT: ;;#ASMEND
3057 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
3058 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
3059 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
3060 ; GFX940-NEXT: ;;#ASMSTART
3061 ; GFX940-NEXT: ; def v[6:11]
3062 ; GFX940-NEXT: ;;#ASMEND
3063 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3064 ; GFX940-NEXT: v_mov_b32_e32 v0, v10
3065 ; GFX940-NEXT: v_mov_b32_e32 v1, v11
3066 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
3067 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
3068 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
3069 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3070 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3071 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3072 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3073 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
3074 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3078 define void @v_shuffle_v4i64_v3i64__5_4_1_1(ptr addrspace(1) inreg %ptr) {
3079 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1:
3081 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3082 ; GFX900-NEXT: ;;#ASMSTART
3083 ; GFX900-NEXT: ; def v[0:5]
3084 ; GFX900-NEXT: ;;#ASMEND
3085 ; GFX900-NEXT: ;;#ASMSTART
3086 ; GFX900-NEXT: ; def v[4:9]
3087 ; GFX900-NEXT: ;;#ASMEND
3088 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
3089 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
3090 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
3091 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3092 ; GFX900-NEXT: s_nop 0
3093 ; GFX900-NEXT: v_mov_b32_e32 v4, v8
3094 ; GFX900-NEXT: v_mov_b32_e32 v5, v9
3095 ; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
3096 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3097 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3099 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1:
3101 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3102 ; GFX90A-NEXT: ;;#ASMSTART
3103 ; GFX90A-NEXT: ; def v[0:5]
3104 ; GFX90A-NEXT: ;;#ASMEND
3105 ; GFX90A-NEXT: ;;#ASMSTART
3106 ; GFX90A-NEXT: ; def v[4:9]
3107 ; GFX90A-NEXT: ;;#ASMEND
3108 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
3109 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
3110 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
3111 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3112 ; GFX90A-NEXT: s_nop 0
3113 ; GFX90A-NEXT: v_mov_b32_e32 v4, v8
3114 ; GFX90A-NEXT: v_mov_b32_e32 v5, v9
3115 ; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
3116 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3117 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3119 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1:
3121 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3122 ; GFX940-NEXT: ;;#ASMSTART
3123 ; GFX940-NEXT: ; def v[0:5]
3124 ; GFX940-NEXT: ;;#ASMEND
3125 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
3126 ; GFX940-NEXT: ;;#ASMSTART
3127 ; GFX940-NEXT: ; def v[4:9]
3128 ; GFX940-NEXT: ;;#ASMEND
3129 ; GFX940-NEXT: s_nop 0
3130 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
3131 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
3132 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
3133 ; GFX940-NEXT: s_nop 1
3134 ; GFX940-NEXT: v_mov_b32_e32 v4, v8
3135 ; GFX940-NEXT: v_mov_b32_e32 v5, v9
3136 ; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1
3137 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3138 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3139 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3140 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3141 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
3142 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3146 define void @v_shuffle_v4i64_v3i64__5_5_1_1(ptr addrspace(1) inreg %ptr) {
3147 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1:
3149 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3150 ; GFX900-NEXT: ;;#ASMSTART
3151 ; GFX900-NEXT: ; def v[0:5]
3152 ; GFX900-NEXT: ;;#ASMEND
3153 ; GFX900-NEXT: ;;#ASMSTART
3154 ; GFX900-NEXT: ; def v[4:9]
3155 ; GFX900-NEXT: ;;#ASMEND
3156 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
3157 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
3158 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
3159 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
3160 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
3161 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3162 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3163 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3164 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3166 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1:
3168 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3169 ; GFX90A-NEXT: ;;#ASMSTART
3170 ; GFX90A-NEXT: ; def v[0:5]
3171 ; GFX90A-NEXT: ;;#ASMEND
3172 ; GFX90A-NEXT: ;;#ASMSTART
3173 ; GFX90A-NEXT: ; def v[4:9]
3174 ; GFX90A-NEXT: ;;#ASMEND
3175 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
3176 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
3177 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
3178 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
3179 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
3180 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3181 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3182 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3183 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3185 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1:
3187 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3188 ; GFX940-NEXT: ;;#ASMSTART
3189 ; GFX940-NEXT: ; def v[0:5]
3190 ; GFX940-NEXT: ;;#ASMEND
3191 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
3192 ; GFX940-NEXT: ;;#ASMSTART
3193 ; GFX940-NEXT: ; def v[4:9]
3194 ; GFX940-NEXT: ;;#ASMEND
3195 ; GFX940-NEXT: s_nop 0
3196 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
3197 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
3198 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
3199 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
3200 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
3201 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3202 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3203 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3204 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3205 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3206 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
3207 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3211 define void @v_shuffle_v4i64_v3i64__5_5_u_1(ptr addrspace(1) inreg %ptr) {
3212 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
3214 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3215 ; GFX900-NEXT: ;;#ASMSTART
3216 ; GFX900-NEXT: ; def v[0:5]
3217 ; GFX900-NEXT: ;;#ASMEND
3218 ; GFX900-NEXT: ;;#ASMSTART
3219 ; GFX900-NEXT: ; def v[4:9]
3220 ; GFX900-NEXT: ;;#ASMEND
3221 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
3222 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
3223 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
3224 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3225 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3226 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3227 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3229 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
3231 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232 ; GFX90A-NEXT: ;;#ASMSTART
3233 ; GFX90A-NEXT: ; def v[0:5]
3234 ; GFX90A-NEXT: ;;#ASMEND
3235 ; GFX90A-NEXT: ;;#ASMSTART
3236 ; GFX90A-NEXT: ; def v[4:9]
3237 ; GFX90A-NEXT: ;;#ASMEND
3238 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
3239 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
3240 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
3241 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3242 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3243 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3244 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3246 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
3248 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249 ; GFX940-NEXT: ;;#ASMSTART
3250 ; GFX940-NEXT: ; def v[0:5]
3251 ; GFX940-NEXT: ;;#ASMEND
3252 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
3253 ; GFX940-NEXT: ;;#ASMSTART
3254 ; GFX940-NEXT: ; def v[4:9]
3255 ; GFX940-NEXT: ;;#ASMEND
3256 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
3257 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
3258 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
3259 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3260 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3261 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3262 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3263 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3264 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
3265 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3269 define void @v_shuffle_v4i64_v3i64__5_5_0_1(ptr addrspace(1) inreg %ptr) {
3270 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
3272 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3273 ; GFX900-NEXT: ;;#ASMSTART
3274 ; GFX900-NEXT: ; def v[0:5]
3275 ; GFX900-NEXT: ;;#ASMEND
3276 ; GFX900-NEXT: ;;#ASMSTART
3277 ; GFX900-NEXT: ; def v[4:9]
3278 ; GFX900-NEXT: ;;#ASMEND
3279 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
3280 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
3281 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
3282 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3283 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3284 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3285 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3287 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
3289 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3290 ; GFX90A-NEXT: ;;#ASMSTART
3291 ; GFX90A-NEXT: ; def v[0:5]
3292 ; GFX90A-NEXT: ;;#ASMEND
3293 ; GFX90A-NEXT: ;;#ASMSTART
3294 ; GFX90A-NEXT: ; def v[4:9]
3295 ; GFX90A-NEXT: ;;#ASMEND
3296 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
3297 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
3298 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
3299 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3300 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3301 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3302 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3304 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
3306 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3307 ; GFX940-NEXT: ;;#ASMSTART
3308 ; GFX940-NEXT: ; def v[0:5]
3309 ; GFX940-NEXT: ;;#ASMEND
3310 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
3311 ; GFX940-NEXT: ;;#ASMSTART
3312 ; GFX940-NEXT: ; def v[4:9]
3313 ; GFX940-NEXT: ;;#ASMEND
3314 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
3315 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
3316 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
3317 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3318 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3319 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3320 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3321 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3322 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
3323 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3327 define void @v_shuffle_v4i64_v3i64__5_5_2_1(ptr addrspace(1) inreg %ptr) {
3328 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1:
3330 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3331 ; GFX900-NEXT: ;;#ASMSTART
3332 ; GFX900-NEXT: ; def v[0:5]
3333 ; GFX900-NEXT: ;;#ASMEND
3334 ; GFX900-NEXT: ;;#ASMSTART
3335 ; GFX900-NEXT: ; def v[6:11]
3336 ; GFX900-NEXT: ;;#ASMEND
3337 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
3338 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
3339 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
3340 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
3341 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
3342 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
3343 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
3344 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3345 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3347 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1:
3349 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3350 ; GFX90A-NEXT: ;;#ASMSTART
3351 ; GFX90A-NEXT: ; def v[0:5]
3352 ; GFX90A-NEXT: ;;#ASMEND
3353 ; GFX90A-NEXT: ;;#ASMSTART
3354 ; GFX90A-NEXT: ; def v[6:11]
3355 ; GFX90A-NEXT: ;;#ASMEND
3356 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
3357 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
3358 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
3359 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
3360 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
3361 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
3362 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
3363 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3364 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3366 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1:
3368 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3369 ; GFX940-NEXT: ;;#ASMSTART
3370 ; GFX940-NEXT: ; def v[0:5]
3371 ; GFX940-NEXT: ;;#ASMEND
3372 ; GFX940-NEXT: ;;#ASMSTART
3373 ; GFX940-NEXT: ; def v[6:11]
3374 ; GFX940-NEXT: ;;#ASMEND
3375 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
3376 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
3377 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
3378 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
3379 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
3380 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
3381 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
3382 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3383 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3384 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3385 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3386 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
3387 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3391 define void @v_shuffle_v4i64_v3i64__5_5_3_1(ptr addrspace(1) inreg %ptr) {
3392 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
3394 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3395 ; GFX900-NEXT: ;;#ASMSTART
3396 ; GFX900-NEXT: ; def v[0:5]
3397 ; GFX900-NEXT: ;;#ASMEND
3398 ; GFX900-NEXT: ;;#ASMSTART
3399 ; GFX900-NEXT: ; def v[4:9]
3400 ; GFX900-NEXT: ;;#ASMEND
3401 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
3402 ; GFX900-NEXT: v_mov_b32_e32 v6, v2
3403 ; GFX900-NEXT: v_mov_b32_e32 v7, v3
3404 ; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
3405 ; GFX900-NEXT: s_nop 0
3406 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
3407 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
3408 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3409 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3410 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3412 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
3414 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3415 ; GFX90A-NEXT: ;;#ASMSTART
3416 ; GFX90A-NEXT: ; def v[0:5]
3417 ; GFX90A-NEXT: ;;#ASMEND
3418 ; GFX90A-NEXT: ;;#ASMSTART
3419 ; GFX90A-NEXT: ; def v[4:9]
3420 ; GFX90A-NEXT: ;;#ASMEND
3421 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
3422 ; GFX90A-NEXT: v_mov_b32_e32 v6, v2
3423 ; GFX90A-NEXT: v_mov_b32_e32 v7, v3
3424 ; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
3425 ; GFX90A-NEXT: s_nop 0
3426 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
3427 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
3428 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3429 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3430 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3432 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
3434 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3435 ; GFX940-NEXT: ;;#ASMSTART
3436 ; GFX940-NEXT: ; def v[0:5]
3437 ; GFX940-NEXT: ;;#ASMEND
3438 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
3439 ; GFX940-NEXT: ;;#ASMSTART
3440 ; GFX940-NEXT: ; def v[4:9]
3441 ; GFX940-NEXT: ;;#ASMEND
3442 ; GFX940-NEXT: s_nop 0
3443 ; GFX940-NEXT: v_mov_b32_e32 v6, v2
3444 ; GFX940-NEXT: v_mov_b32_e32 v7, v3
3445 ; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
3446 ; GFX940-NEXT: s_nop 1
3447 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
3448 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
3449 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3450 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3451 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3452 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3453 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3454 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
3455 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3459 define void @v_shuffle_v4i64_v3i64__5_5_4_1(ptr addrspace(1) inreg %ptr) {
3460 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1:
3462 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3463 ; GFX900-NEXT: ;;#ASMSTART
3464 ; GFX900-NEXT: ; def v[0:5]
3465 ; GFX900-NEXT: ;;#ASMEND
3466 ; GFX900-NEXT: ;;#ASMSTART
3467 ; GFX900-NEXT: ; def v[4:9]
3468 ; GFX900-NEXT: ;;#ASMEND
3469 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
3470 ; GFX900-NEXT: v_mov_b32_e32 v0, v6
3471 ; GFX900-NEXT: v_mov_b32_e32 v1, v7
3472 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
3473 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
3474 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3475 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3476 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3477 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3479 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1:
3481 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3482 ; GFX90A-NEXT: ;;#ASMSTART
3483 ; GFX90A-NEXT: ; def v[0:5]
3484 ; GFX90A-NEXT: ;;#ASMEND
3485 ; GFX90A-NEXT: ;;#ASMSTART
3486 ; GFX90A-NEXT: ; def v[4:9]
3487 ; GFX90A-NEXT: ;;#ASMEND
3488 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
3489 ; GFX90A-NEXT: v_mov_b32_e32 v0, v6
3490 ; GFX90A-NEXT: v_mov_b32_e32 v1, v7
3491 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
3492 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
3493 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3494 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
3495 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3496 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3498 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1:
3500 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3501 ; GFX940-NEXT: ;;#ASMSTART
3502 ; GFX940-NEXT: ; def v[0:5]
3503 ; GFX940-NEXT: ;;#ASMEND
3504 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
3505 ; GFX940-NEXT: ;;#ASMSTART
3506 ; GFX940-NEXT: ; def v[4:9]
3507 ; GFX940-NEXT: ;;#ASMEND
3508 ; GFX940-NEXT: s_nop 0
3509 ; GFX940-NEXT: v_mov_b32_e32 v0, v6
3510 ; GFX940-NEXT: v_mov_b32_e32 v1, v7
3511 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
3512 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
3513 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
3514 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3515 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3516 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3517 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3518 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3519 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
3520 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3524 define void @v_shuffle_v4i64_v3i64__u_2_2_2(ptr addrspace(1) inreg %ptr) {
3525 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
3527 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3528 ; GFX900-NEXT: ;;#ASMSTART
3529 ; GFX900-NEXT: ; def v[0:5]
3530 ; GFX900-NEXT: ;;#ASMEND
3531 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
3532 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3533 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3534 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3535 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
3536 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3537 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3539 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
3541 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3542 ; GFX90A-NEXT: ;;#ASMSTART
3543 ; GFX90A-NEXT: ; def v[0:5]
3544 ; GFX90A-NEXT: ;;#ASMEND
3545 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
3546 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3547 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3548 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3549 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
3550 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3551 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3553 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
3555 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3556 ; GFX940-NEXT: ;;#ASMSTART
3557 ; GFX940-NEXT: ; def v[0:5]
3558 ; GFX940-NEXT: ;;#ASMEND
3559 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
3560 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
3561 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
3562 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3563 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
3564 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3565 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3566 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3567 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
3568 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3572 define void @v_shuffle_v4i64_v3i64__0_2_2_2(ptr addrspace(1) inreg %ptr) {
3573 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2:
3575 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3576 ; GFX900-NEXT: ;;#ASMSTART
3577 ; GFX900-NEXT: ; def v[0:5]
3578 ; GFX900-NEXT: ;;#ASMEND
3579 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
3580 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3581 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3582 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3583 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
3584 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3585 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3587 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2:
3589 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3590 ; GFX90A-NEXT: ;;#ASMSTART
3591 ; GFX90A-NEXT: ; def v[0:5]
3592 ; GFX90A-NEXT: ;;#ASMEND
3593 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
3594 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3595 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3596 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3597 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
3598 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3599 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3601 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2:
3603 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3604 ; GFX940-NEXT: ;;#ASMSTART
3605 ; GFX940-NEXT: ; def v[0:5]
3606 ; GFX940-NEXT: ;;#ASMEND
3607 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
3608 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
3609 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
3610 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3611 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
3612 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3613 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3614 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
3616 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3620 define void @v_shuffle_v4i64_v3i64__1_2_2_2(ptr addrspace(1) inreg %ptr) {
3621 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
3623 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3624 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
3625 ; GFX900-NEXT: ;;#ASMSTART
3626 ; GFX900-NEXT: ; def v[0:5]
3627 ; GFX900-NEXT: ;;#ASMEND
3628 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
3629 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
3630 ; GFX900-NEXT: v_mov_b32_e32 v8, v4
3631 ; GFX900-NEXT: v_mov_b32_e32 v9, v5
3632 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
3633 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
3634 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3635 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3637 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
3639 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3640 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
3641 ; GFX90A-NEXT: ;;#ASMSTART
3642 ; GFX90A-NEXT: ; def v[0:5]
3643 ; GFX90A-NEXT: ;;#ASMEND
3644 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
3645 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
3646 ; GFX90A-NEXT: v_mov_b32_e32 v8, v4
3647 ; GFX90A-NEXT: v_mov_b32_e32 v9, v5
3648 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
3649 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
3650 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3651 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3653 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
3655 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3656 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
3657 ; GFX940-NEXT: ;;#ASMSTART
3658 ; GFX940-NEXT: ; def v[0:5]
3659 ; GFX940-NEXT: ;;#ASMEND
3660 ; GFX940-NEXT: s_nop 0
3661 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
3662 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
3663 ; GFX940-NEXT: v_mov_b32_e32 v8, v4
3664 ; GFX940-NEXT: v_mov_b32_e32 v9, v5
3665 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
3666 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
3667 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3668 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3669 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3670 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
3671 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3675 define void @v_shuffle_v4i64_v3i64__2_2_2_2(ptr addrspace(1) inreg %ptr) {
3676 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
3678 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3679 ; GFX900-NEXT: ;;#ASMSTART
3680 ; GFX900-NEXT: ; def v[0:5]
3681 ; GFX900-NEXT: ;;#ASMEND
3682 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
3683 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3684 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3685 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3686 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
3687 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3688 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3690 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
3692 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3693 ; GFX90A-NEXT: ;;#ASMSTART
3694 ; GFX90A-NEXT: ; def v[0:5]
3695 ; GFX90A-NEXT: ;;#ASMEND
3696 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
3697 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3698 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3699 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3700 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
3701 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3702 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3704 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
3706 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3707 ; GFX940-NEXT: ;;#ASMSTART
3708 ; GFX940-NEXT: ; def v[0:5]
3709 ; GFX940-NEXT: ;;#ASMEND
3710 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
3711 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
3712 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
3713 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3714 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
3715 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3716 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3717 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3718 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
3719 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3723 define void @v_shuffle_v4i64_v3i64__3_2_2_2(ptr addrspace(1) inreg %ptr) {
3724 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
3726 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3727 ; GFX900-NEXT: ;;#ASMSTART
3728 ; GFX900-NEXT: ; def v[0:5]
3729 ; GFX900-NEXT: ;;#ASMEND
3730 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
3731 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3732 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3733 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3734 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
3735 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3736 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3738 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
3740 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3741 ; GFX90A-NEXT: ;;#ASMSTART
3742 ; GFX90A-NEXT: ; def v[0:5]
3743 ; GFX90A-NEXT: ;;#ASMEND
3744 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
3745 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3746 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3747 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3748 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
3749 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3750 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3752 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
3754 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3755 ; GFX940-NEXT: ;;#ASMSTART
3756 ; GFX940-NEXT: ; def v[0:5]
3757 ; GFX940-NEXT: ;;#ASMEND
3758 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
3759 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
3760 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
3761 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3762 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
3763 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3764 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3765 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3766 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
3767 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3771 define void @v_shuffle_v4i64_v3i64__4_2_2_2(ptr addrspace(1) inreg %ptr) {
3772 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2:
3774 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3775 ; GFX900-NEXT: ;;#ASMSTART
3776 ; GFX900-NEXT: ; def v[0:5]
3777 ; GFX900-NEXT: ;;#ASMEND
3778 ; GFX900-NEXT: ;;#ASMSTART
3779 ; GFX900-NEXT: ; def v[6:11]
3780 ; GFX900-NEXT: ;;#ASMEND
3781 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
3782 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3783 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3784 ; GFX900-NEXT: v_mov_b32_e32 v10, v4
3785 ; GFX900-NEXT: v_mov_b32_e32 v11, v5
3786 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3787 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
3788 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3789 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3791 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2:
3793 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3794 ; GFX90A-NEXT: ;;#ASMSTART
3795 ; GFX90A-NEXT: ; def v[0:5]
3796 ; GFX90A-NEXT: ;;#ASMEND
3797 ; GFX90A-NEXT: ;;#ASMSTART
3798 ; GFX90A-NEXT: ; def v[6:11]
3799 ; GFX90A-NEXT: ;;#ASMEND
3800 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
3801 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3802 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3803 ; GFX90A-NEXT: v_mov_b32_e32 v10, v4
3804 ; GFX90A-NEXT: v_mov_b32_e32 v11, v5
3805 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3806 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
3807 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3808 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3810 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2:
3812 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3813 ; GFX940-NEXT: ;;#ASMSTART
3814 ; GFX940-NEXT: ; def v[0:5]
3815 ; GFX940-NEXT: ;;#ASMEND
3816 ; GFX940-NEXT: ;;#ASMSTART
3817 ; GFX940-NEXT: ; def v[6:11]
3818 ; GFX940-NEXT: ;;#ASMEND
3819 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
3820 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
3821 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
3822 ; GFX940-NEXT: v_mov_b32_e32 v10, v4
3823 ; GFX940-NEXT: v_mov_b32_e32 v11, v5
3824 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3825 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
3826 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3827 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3828 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3829 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3830 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
3831 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3835 define void @v_shuffle_v4i64_v3i64__5_2_2_2(ptr addrspace(1) inreg %ptr) {
3836 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2:
3838 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3839 ; GFX900-NEXT: ;;#ASMSTART
3840 ; GFX900-NEXT: ; def v[0:5]
3841 ; GFX900-NEXT: ;;#ASMEND
3842 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
3843 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3844 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3845 ; GFX900-NEXT: ;;#ASMSTART
3846 ; GFX900-NEXT: ; def v[6:11]
3847 ; GFX900-NEXT: ;;#ASMEND
3848 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3849 ; GFX900-NEXT: s_nop 0
3850 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
3851 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
3852 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
3853 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3854 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3856 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2:
3858 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3859 ; GFX90A-NEXT: ;;#ASMSTART
3860 ; GFX90A-NEXT: ; def v[0:5]
3861 ; GFX90A-NEXT: ;;#ASMEND
3862 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
3863 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3864 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3865 ; GFX90A-NEXT: ;;#ASMSTART
3866 ; GFX90A-NEXT: ; def v[6:11]
3867 ; GFX90A-NEXT: ;;#ASMEND
3868 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3869 ; GFX90A-NEXT: s_nop 0
3870 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
3871 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
3872 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
3873 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3874 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3876 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2:
3878 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3879 ; GFX940-NEXT: ;;#ASMSTART
3880 ; GFX940-NEXT: ; def v[0:5]
3881 ; GFX940-NEXT: ;;#ASMEND
3882 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
3883 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
3884 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
3885 ; GFX940-NEXT: ;;#ASMSTART
3886 ; GFX940-NEXT: ; def v[6:11]
3887 ; GFX940-NEXT: ;;#ASMEND
3888 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3889 ; GFX940-NEXT: s_nop 1
3890 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
3891 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
3892 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
3893 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3894 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3895 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3896 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3897 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
3898 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3902 define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
3903 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2:
3905 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3906 ; GFX900-NEXT: ;;#ASMSTART
3907 ; GFX900-NEXT: ; def v[0:5]
3908 ; GFX900-NEXT: ;;#ASMEND
3909 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
3910 ; GFX900-NEXT: ;;#ASMSTART
3911 ; GFX900-NEXT: ; def v[6:11]
3912 ; GFX900-NEXT: ;;#ASMEND
3913 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3914 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3915 ; GFX900-NEXT: v_mov_b32_e32 v0, v10
3916 ; GFX900-NEXT: v_mov_b32_e32 v1, v11
3917 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3918 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
3919 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3920 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3922 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2:
3924 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3925 ; GFX90A-NEXT: ;;#ASMSTART
3926 ; GFX90A-NEXT: ; def v[0:5]
3927 ; GFX90A-NEXT: ;;#ASMEND
3928 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
3929 ; GFX90A-NEXT: ;;#ASMSTART
3930 ; GFX90A-NEXT: ; def v[6:11]
3931 ; GFX90A-NEXT: ;;#ASMEND
3932 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3933 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3934 ; GFX90A-NEXT: v_mov_b32_e32 v0, v10
3935 ; GFX90A-NEXT: v_mov_b32_e32 v1, v11
3936 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3937 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
3938 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3939 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3941 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2:
3943 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3944 ; GFX940-NEXT: ;;#ASMSTART
3945 ; GFX940-NEXT: ; def v[0:5]
3946 ; GFX940-NEXT: ;;#ASMEND
3947 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
3948 ; GFX940-NEXT: ;;#ASMSTART
3949 ; GFX940-NEXT: ; def v[6:11]
3950 ; GFX940-NEXT: ;;#ASMEND
3951 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
3952 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
3953 ; GFX940-NEXT: v_mov_b32_e32 v0, v10
3954 ; GFX940-NEXT: v_mov_b32_e32 v1, v11
3955 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3956 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
3957 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3958 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3959 %vec0 = call <3 x i64> asm "; def $0", "=v"()
3960 %vec1 = call <3 x i64> asm "; def $0", "=v"()
3961 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
3962 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3966 define void @v_shuffle_v4i64_v3i64__5_0_2_2(ptr addrspace(1) inreg %ptr) {
3967 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
3969 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3970 ; GFX900-NEXT: ;;#ASMSTART
3971 ; GFX900-NEXT: ; def v[0:5]
3972 ; GFX900-NEXT: ;;#ASMEND
3973 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
3974 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
3975 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
3976 ; GFX900-NEXT: ;;#ASMSTART
3977 ; GFX900-NEXT: ; def v[6:11]
3978 ; GFX900-NEXT: ;;#ASMEND
3979 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3980 ; GFX900-NEXT: s_nop 0
3981 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
3982 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
3983 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
3984 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
3985 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
3986 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3987 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3989 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
3991 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3992 ; GFX90A-NEXT: ;;#ASMSTART
3993 ; GFX90A-NEXT: ; def v[0:5]
3994 ; GFX90A-NEXT: ;;#ASMEND
3995 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
3996 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
3997 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
3998 ; GFX90A-NEXT: ;;#ASMSTART
3999 ; GFX90A-NEXT: ; def v[6:11]
4000 ; GFX90A-NEXT: ;;#ASMEND
4001 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4002 ; GFX90A-NEXT: s_nop 0
4003 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
4004 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
4005 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
4006 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
4007 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
4008 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4009 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4011 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
4013 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4014 ; GFX940-NEXT: ;;#ASMSTART
4015 ; GFX940-NEXT: ; def v[0:5]
4016 ; GFX940-NEXT: ;;#ASMEND
4017 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4018 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
4019 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
4020 ; GFX940-NEXT: ;;#ASMSTART
4021 ; GFX940-NEXT: ; def v[6:11]
4022 ; GFX940-NEXT: ;;#ASMEND
4023 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4024 ; GFX940-NEXT: s_nop 1
4025 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
4026 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
4027 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
4028 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
4029 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
4030 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4031 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4032 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4033 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4034 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
4035 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4039 define void @v_shuffle_v4i64_v3i64__5_1_2_2(ptr addrspace(1) inreg %ptr) {
4040 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
4042 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4043 ; GFX900-NEXT: ;;#ASMSTART
4044 ; GFX900-NEXT: ; def v[0:5]
4045 ; GFX900-NEXT: ;;#ASMEND
4046 ; GFX900-NEXT: ;;#ASMSTART
4047 ; GFX900-NEXT: ; def v[6:11]
4048 ; GFX900-NEXT: ;;#ASMEND
4049 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4050 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
4051 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
4052 ; GFX900-NEXT: v_mov_b32_e32 v8, v4
4053 ; GFX900-NEXT: v_mov_b32_e32 v9, v5
4054 ; GFX900-NEXT: v_mov_b32_e32 v0, v10
4055 ; GFX900-NEXT: v_mov_b32_e32 v1, v11
4056 ; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4057 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
4058 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4059 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4061 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
4063 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4064 ; GFX90A-NEXT: ;;#ASMSTART
4065 ; GFX90A-NEXT: ; def v[0:5]
4066 ; GFX90A-NEXT: ;;#ASMEND
4067 ; GFX90A-NEXT: ;;#ASMSTART
4068 ; GFX90A-NEXT: ; def v[6:11]
4069 ; GFX90A-NEXT: ;;#ASMEND
4070 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4071 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
4072 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
4073 ; GFX90A-NEXT: v_mov_b32_e32 v8, v4
4074 ; GFX90A-NEXT: v_mov_b32_e32 v9, v5
4075 ; GFX90A-NEXT: v_mov_b32_e32 v0, v10
4076 ; GFX90A-NEXT: v_mov_b32_e32 v1, v11
4077 ; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4078 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
4079 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4080 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4082 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
4084 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4085 ; GFX940-NEXT: ;;#ASMSTART
4086 ; GFX940-NEXT: ; def v[0:5]
4087 ; GFX940-NEXT: ;;#ASMEND
4088 ; GFX940-NEXT: ;;#ASMSTART
4089 ; GFX940-NEXT: ; def v[6:11]
4090 ; GFX940-NEXT: ;;#ASMEND
4091 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4092 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
4093 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
4094 ; GFX940-NEXT: v_mov_b32_e32 v8, v4
4095 ; GFX940-NEXT: v_mov_b32_e32 v9, v5
4096 ; GFX940-NEXT: v_mov_b32_e32 v0, v10
4097 ; GFX940-NEXT: v_mov_b32_e32 v1, v11
4098 ; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
4099 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
4100 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4101 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4102 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4103 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4104 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
4105 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4109 define void @v_shuffle_v4i64_v3i64__5_3_2_2(ptr addrspace(1) inreg %ptr) {
4110 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
4112 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4113 ; GFX900-NEXT: ;;#ASMSTART
4114 ; GFX900-NEXT: ; def v[0:5]
4115 ; GFX900-NEXT: ;;#ASMEND
4116 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4117 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
4118 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
4119 ; GFX900-NEXT: ;;#ASMSTART
4120 ; GFX900-NEXT: ; def v[6:11]
4121 ; GFX900-NEXT: ;;#ASMEND
4122 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4123 ; GFX900-NEXT: v_mov_b32_e32 v0, v10
4124 ; GFX900-NEXT: v_mov_b32_e32 v1, v11
4125 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
4126 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
4127 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
4128 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4129 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4131 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
4133 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4134 ; GFX90A-NEXT: ;;#ASMSTART
4135 ; GFX90A-NEXT: ; def v[0:5]
4136 ; GFX90A-NEXT: ;;#ASMEND
4137 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4138 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
4139 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
4140 ; GFX90A-NEXT: ;;#ASMSTART
4141 ; GFX90A-NEXT: ; def v[6:11]
4142 ; GFX90A-NEXT: ;;#ASMEND
4143 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4144 ; GFX90A-NEXT: v_mov_b32_e32 v0, v10
4145 ; GFX90A-NEXT: v_mov_b32_e32 v1, v11
4146 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
4147 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
4148 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
4149 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4150 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4152 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
4154 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4155 ; GFX940-NEXT: ;;#ASMSTART
4156 ; GFX940-NEXT: ; def v[0:5]
4157 ; GFX940-NEXT: ;;#ASMEND
4158 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4159 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
4160 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
4161 ; GFX940-NEXT: ;;#ASMSTART
4162 ; GFX940-NEXT: ; def v[6:11]
4163 ; GFX940-NEXT: ;;#ASMEND
4164 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4165 ; GFX940-NEXT: v_mov_b32_e32 v0, v10
4166 ; GFX940-NEXT: v_mov_b32_e32 v1, v11
4167 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
4168 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
4169 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
4170 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4171 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4172 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4173 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4174 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
4175 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4179 define void @v_shuffle_v4i64_v3i64__5_4_2_2(ptr addrspace(1) inreg %ptr) {
4180 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2:
4182 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4183 ; GFX900-NEXT: ;;#ASMSTART
4184 ; GFX900-NEXT: ; def v[0:5]
4185 ; GFX900-NEXT: ;;#ASMEND
4186 ; GFX900-NEXT: ;;#ASMSTART
4187 ; GFX900-NEXT: ; def v[6:11]
4188 ; GFX900-NEXT: ;;#ASMEND
4189 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4190 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
4191 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
4192 ; GFX900-NEXT: v_mov_b32_e32 v6, v10
4193 ; GFX900-NEXT: v_mov_b32_e32 v7, v11
4194 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4195 ; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
4196 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4197 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4199 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2:
4201 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4202 ; GFX90A-NEXT: ;;#ASMSTART
4203 ; GFX90A-NEXT: ; def v[0:5]
4204 ; GFX90A-NEXT: ;;#ASMEND
4205 ; GFX90A-NEXT: ;;#ASMSTART
4206 ; GFX90A-NEXT: ; def v[6:11]
4207 ; GFX90A-NEXT: ;;#ASMEND
4208 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4209 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
4210 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
4211 ; GFX90A-NEXT: v_mov_b32_e32 v6, v10
4212 ; GFX90A-NEXT: v_mov_b32_e32 v7, v11
4213 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4214 ; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
4215 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4216 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4218 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2:
4220 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4221 ; GFX940-NEXT: ;;#ASMSTART
4222 ; GFX940-NEXT: ; def v[0:5]
4223 ; GFX940-NEXT: ;;#ASMEND
4224 ; GFX940-NEXT: ;;#ASMSTART
4225 ; GFX940-NEXT: ; def v[6:11]
4226 ; GFX940-NEXT: ;;#ASMEND
4227 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4228 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
4229 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
4230 ; GFX940-NEXT: v_mov_b32_e32 v6, v10
4231 ; GFX940-NEXT: v_mov_b32_e32 v7, v11
4232 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4233 ; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1
4234 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4235 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4236 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4237 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4238 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
4239 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4243 define void @v_shuffle_v4i64_v3i64__5_5_2_2(ptr addrspace(1) inreg %ptr) {
4244 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2:
4246 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4247 ; GFX900-NEXT: ;;#ASMSTART
4248 ; GFX900-NEXT: ; def v[0:5]
4249 ; GFX900-NEXT: ;;#ASMEND
4250 ; GFX900-NEXT: ;;#ASMSTART
4251 ; GFX900-NEXT: ; def v[6:11]
4252 ; GFX900-NEXT: ;;#ASMEND
4253 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4254 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
4255 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
4256 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
4257 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
4258 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4259 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4260 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4261 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4263 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2:
4265 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4266 ; GFX90A-NEXT: ;;#ASMSTART
4267 ; GFX90A-NEXT: ; def v[0:5]
4268 ; GFX90A-NEXT: ;;#ASMEND
4269 ; GFX90A-NEXT: ;;#ASMSTART
4270 ; GFX90A-NEXT: ; def v[6:11]
4271 ; GFX90A-NEXT: ;;#ASMEND
4272 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4273 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
4274 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
4275 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
4276 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
4277 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4278 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4279 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4280 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4282 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2:
4284 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4285 ; GFX940-NEXT: ;;#ASMSTART
4286 ; GFX940-NEXT: ; def v[0:5]
4287 ; GFX940-NEXT: ;;#ASMEND
4288 ; GFX940-NEXT: ;;#ASMSTART
4289 ; GFX940-NEXT: ; def v[6:11]
4290 ; GFX940-NEXT: ;;#ASMEND
4291 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4292 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
4293 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
4294 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
4295 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
4296 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4297 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4298 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4299 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4300 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4301 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4302 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
4303 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4307 define void @v_shuffle_v4i64_v3i64__5_5_u_2(ptr addrspace(1) inreg %ptr) {
4308 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
4310 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4311 ; GFX900-NEXT: ;;#ASMSTART
4312 ; GFX900-NEXT: ; def v[6:11]
4313 ; GFX900-NEXT: ;;#ASMEND
4314 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4315 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
4316 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
4317 ; GFX900-NEXT: ;;#ASMSTART
4318 ; GFX900-NEXT: ; def v[0:5]
4319 ; GFX900-NEXT: ;;#ASMEND
4320 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4321 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4322 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4323 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4325 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
4327 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4328 ; GFX90A-NEXT: ;;#ASMSTART
4329 ; GFX90A-NEXT: ; def v[6:11]
4330 ; GFX90A-NEXT: ;;#ASMEND
4331 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4332 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
4333 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
4334 ; GFX90A-NEXT: ;;#ASMSTART
4335 ; GFX90A-NEXT: ; def v[0:5]
4336 ; GFX90A-NEXT: ;;#ASMEND
4337 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4338 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4339 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4340 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4342 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
4344 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4345 ; GFX940-NEXT: ;;#ASMSTART
4346 ; GFX940-NEXT: ; def v[6:11]
4347 ; GFX940-NEXT: ;;#ASMEND
4348 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4349 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
4350 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
4351 ; GFX940-NEXT: ;;#ASMSTART
4352 ; GFX940-NEXT: ; def v[0:5]
4353 ; GFX940-NEXT: ;;#ASMEND
4354 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4355 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4356 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4357 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4358 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4359 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4360 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
4361 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4365 define void @v_shuffle_v4i64_v3i64__5_5_0_2(ptr addrspace(1) inreg %ptr) {
4366 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2:
4368 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4369 ; GFX900-NEXT: ;;#ASMSTART
4370 ; GFX900-NEXT: ; def v[0:5]
4371 ; GFX900-NEXT: ;;#ASMEND
4372 ; GFX900-NEXT: ;;#ASMSTART
4373 ; GFX900-NEXT: ; def v[6:11]
4374 ; GFX900-NEXT: ;;#ASMEND
4375 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4376 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
4377 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
4378 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
4379 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
4380 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
4381 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4382 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4383 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4385 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2:
4387 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4388 ; GFX90A-NEXT: ;;#ASMSTART
4389 ; GFX90A-NEXT: ; def v[0:5]
4390 ; GFX90A-NEXT: ;;#ASMEND
4391 ; GFX90A-NEXT: ;;#ASMSTART
4392 ; GFX90A-NEXT: ; def v[6:11]
4393 ; GFX90A-NEXT: ;;#ASMEND
4394 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4395 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
4396 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
4397 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
4398 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
4399 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
4400 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4401 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4402 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4404 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2:
4406 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4407 ; GFX940-NEXT: ;;#ASMSTART
4408 ; GFX940-NEXT: ; def v[0:5]
4409 ; GFX940-NEXT: ;;#ASMEND
4410 ; GFX940-NEXT: ;;#ASMSTART
4411 ; GFX940-NEXT: ; def v[6:11]
4412 ; GFX940-NEXT: ;;#ASMEND
4413 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4414 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
4415 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
4416 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
4417 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
4418 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
4419 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4420 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4421 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4422 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4423 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4424 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
4425 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4429 define void @v_shuffle_v4i64_v3i64__5_5_1_2(ptr addrspace(1) inreg %ptr) {
4430 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
4432 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4433 ; GFX900-NEXT: ;;#ASMSTART
4434 ; GFX900-NEXT: ; def v[6:11]
4435 ; GFX900-NEXT: ;;#ASMEND
4436 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4437 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
4438 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
4439 ; GFX900-NEXT: ;;#ASMSTART
4440 ; GFX900-NEXT: ; def v[0:5]
4441 ; GFX900-NEXT: ;;#ASMEND
4442 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4443 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4444 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4445 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4447 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
4449 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4450 ; GFX90A-NEXT: ;;#ASMSTART
4451 ; GFX90A-NEXT: ; def v[6:11]
4452 ; GFX90A-NEXT: ;;#ASMEND
4453 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4454 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
4455 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
4456 ; GFX90A-NEXT: ;;#ASMSTART
4457 ; GFX90A-NEXT: ; def v[0:5]
4458 ; GFX90A-NEXT: ;;#ASMEND
4459 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4460 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4461 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4462 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4464 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
4466 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4467 ; GFX940-NEXT: ;;#ASMSTART
4468 ; GFX940-NEXT: ; def v[6:11]
4469 ; GFX940-NEXT: ;;#ASMEND
4470 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4471 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
4472 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
4473 ; GFX940-NEXT: ;;#ASMSTART
4474 ; GFX940-NEXT: ; def v[0:5]
4475 ; GFX940-NEXT: ;;#ASMEND
4476 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4477 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4478 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4479 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4480 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4481 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4482 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
4483 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4487 define void @v_shuffle_v4i64_v3i64__5_5_3_2(ptr addrspace(1) inreg %ptr) {
4488 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2:
4490 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4491 ; GFX900-NEXT: ;;#ASMSTART
4492 ; GFX900-NEXT: ; def v[6:11]
4493 ; GFX900-NEXT: ;;#ASMEND
4494 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4495 ; GFX900-NEXT: ;;#ASMSTART
4496 ; GFX900-NEXT: ; def v[0:5]
4497 ; GFX900-NEXT: ;;#ASMEND
4498 ; GFX900-NEXT: v_mov_b32_e32 v8, v4
4499 ; GFX900-NEXT: v_mov_b32_e32 v9, v5
4500 ; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4501 ; GFX900-NEXT: s_nop 0
4502 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
4503 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
4504 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4505 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4506 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4508 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2:
4510 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4511 ; GFX90A-NEXT: ;;#ASMSTART
4512 ; GFX90A-NEXT: ; def v[6:11]
4513 ; GFX90A-NEXT: ;;#ASMEND
4514 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4515 ; GFX90A-NEXT: ;;#ASMSTART
4516 ; GFX90A-NEXT: ; def v[0:5]
4517 ; GFX90A-NEXT: ;;#ASMEND
4518 ; GFX90A-NEXT: v_mov_b32_e32 v8, v4
4519 ; GFX90A-NEXT: v_mov_b32_e32 v9, v5
4520 ; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4521 ; GFX90A-NEXT: s_nop 0
4522 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
4523 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
4524 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4525 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4526 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4528 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2:
4530 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531 ; GFX940-NEXT: ;;#ASMSTART
4532 ; GFX940-NEXT: ; def v[6:11]
4533 ; GFX940-NEXT: ;;#ASMEND
4534 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4535 ; GFX940-NEXT: ;;#ASMSTART
4536 ; GFX940-NEXT: ; def v[0:5]
4537 ; GFX940-NEXT: ;;#ASMEND
4538 ; GFX940-NEXT: s_nop 0
4539 ; GFX940-NEXT: v_mov_b32_e32 v8, v4
4540 ; GFX940-NEXT: v_mov_b32_e32 v9, v5
4541 ; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
4542 ; GFX940-NEXT: s_nop 1
4543 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
4544 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
4545 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4546 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4547 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4548 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4549 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4550 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
4551 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4555 define void @v_shuffle_v4i64_v3i64__5_5_4_2(ptr addrspace(1) inreg %ptr) {
4556 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2:
4558 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4559 ; GFX900-NEXT: ;;#ASMSTART
4560 ; GFX900-NEXT: ; def v[0:5]
4561 ; GFX900-NEXT: ;;#ASMEND
4562 ; GFX900-NEXT: ;;#ASMSTART
4563 ; GFX900-NEXT: ; def v[6:11]
4564 ; GFX900-NEXT: ;;#ASMEND
4565 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
4566 ; GFX900-NEXT: v_mov_b32_e32 v2, v8
4567 ; GFX900-NEXT: v_mov_b32_e32 v3, v9
4568 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
4569 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
4570 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4571 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4572 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4573 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4575 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2:
4577 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4578 ; GFX90A-NEXT: ;;#ASMSTART
4579 ; GFX90A-NEXT: ; def v[0:5]
4580 ; GFX90A-NEXT: ;;#ASMEND
4581 ; GFX90A-NEXT: ;;#ASMSTART
4582 ; GFX90A-NEXT: ; def v[6:11]
4583 ; GFX90A-NEXT: ;;#ASMEND
4584 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
4585 ; GFX90A-NEXT: v_mov_b32_e32 v2, v8
4586 ; GFX90A-NEXT: v_mov_b32_e32 v3, v9
4587 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
4588 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
4589 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4590 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
4591 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4592 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4594 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2:
4596 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4597 ; GFX940-NEXT: ;;#ASMSTART
4598 ; GFX940-NEXT: ; def v[0:5]
4599 ; GFX940-NEXT: ;;#ASMEND
4600 ; GFX940-NEXT: ;;#ASMSTART
4601 ; GFX940-NEXT: ; def v[6:11]
4602 ; GFX940-NEXT: ;;#ASMEND
4603 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
4604 ; GFX940-NEXT: v_mov_b32_e32 v2, v8
4605 ; GFX940-NEXT: v_mov_b32_e32 v3, v9
4606 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
4607 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
4608 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4609 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4610 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4611 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4612 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4613 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4614 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
4615 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4619 define void @v_shuffle_v4i64_v3i64__u_3_3_3(ptr addrspace(1) inreg %ptr) {
4620 ; GFX9-LABEL: v_shuffle_v4i64_v3i64__u_3_3_3:
4622 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4623 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4624 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4625 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
4626 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4630 define void @v_shuffle_v4i64_v3i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
4631 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
4633 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4634 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
4635 ; GFX900-NEXT: ;;#ASMSTART
4636 ; GFX900-NEXT: ; def v[0:5]
4637 ; GFX900-NEXT: ;;#ASMEND
4638 ; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
4639 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
4640 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4641 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4643 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
4645 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4646 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
4647 ; GFX90A-NEXT: ;;#ASMSTART
4648 ; GFX90A-NEXT: ; def v[0:5]
4649 ; GFX90A-NEXT: ;;#ASMEND
4650 ; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
4651 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
4652 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4653 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4655 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
4657 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4658 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
4659 ; GFX940-NEXT: ;;#ASMSTART
4660 ; GFX940-NEXT: ; def v[0:5]
4661 ; GFX940-NEXT: ;;#ASMEND
4662 ; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1
4663 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
4664 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4665 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4666 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4667 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
4668 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4672 define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
4673 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
4675 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4676 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
4677 ; GFX900-NEXT: ;;#ASMSTART
4678 ; GFX900-NEXT: ; def v[0:5]
4679 ; GFX900-NEXT: ;;#ASMEND
4680 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
4681 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4682 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4684 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
4686 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4687 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
4688 ; GFX90A-NEXT: ;;#ASMSTART
4689 ; GFX90A-NEXT: ; def v[0:5]
4690 ; GFX90A-NEXT: ;;#ASMEND
4691 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
4692 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4693 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4695 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
4697 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4698 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
4699 ; GFX940-NEXT: ;;#ASMSTART
4700 ; GFX940-NEXT: ; def v[0:5]
4701 ; GFX940-NEXT: ;;#ASMEND
4702 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
4703 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4704 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4705 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4706 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
4707 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4711 define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
4712 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
4714 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4715 ; GFX900-NEXT: ;;#ASMSTART
4716 ; GFX900-NEXT: ; def v[0:5]
4717 ; GFX900-NEXT: ;;#ASMEND
4718 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
4719 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
4720 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
4721 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
4722 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4723 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4725 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
4727 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4728 ; GFX90A-NEXT: ;;#ASMSTART
4729 ; GFX90A-NEXT: ; def v[0:5]
4730 ; GFX90A-NEXT: ;;#ASMEND
4731 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
4732 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
4733 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
4734 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
4735 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4736 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4738 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
4740 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4741 ; GFX940-NEXT: ;;#ASMSTART
4742 ; GFX940-NEXT: ; def v[0:5]
4743 ; GFX940-NEXT: ;;#ASMEND
4744 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
4745 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
4746 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
4747 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
4748 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4749 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4750 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4751 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
4752 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4756 define void @v_shuffle_v4i64_v3i64__3_3_3_3(ptr addrspace(1) inreg %ptr) {
4757 ; GFX9-LABEL: v_shuffle_v4i64_v3i64__3_3_3_3:
4759 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4760 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4761 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4762 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4763 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4767 define void @v_shuffle_v4i64_v3i64__4_3_3_3(ptr addrspace(1) inreg %ptr) {
4768 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3:
4770 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4771 ; GFX900-NEXT: ;;#ASMSTART
4772 ; GFX900-NEXT: ; def v[0:5]
4773 ; GFX900-NEXT: ;;#ASMEND
4774 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
4775 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
4776 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
4777 ; GFX900-NEXT: v_mov_b32_e32 v6, v0
4778 ; GFX900-NEXT: v_mov_b32_e32 v7, v1
4779 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
4780 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
4781 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4782 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4784 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3:
4786 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4787 ; GFX90A-NEXT: ;;#ASMSTART
4788 ; GFX90A-NEXT: ; def v[0:5]
4789 ; GFX90A-NEXT: ;;#ASMEND
4790 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
4791 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
4792 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
4793 ; GFX90A-NEXT: v_mov_b32_e32 v6, v0
4794 ; GFX90A-NEXT: v_mov_b32_e32 v7, v1
4795 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
4796 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
4797 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4798 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4800 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3:
4802 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4803 ; GFX940-NEXT: ;;#ASMSTART
4804 ; GFX940-NEXT: ; def v[0:5]
4805 ; GFX940-NEXT: ;;#ASMEND
4806 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
4807 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
4808 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
4809 ; GFX940-NEXT: v_mov_b32_e32 v6, v0
4810 ; GFX940-NEXT: v_mov_b32_e32 v7, v1
4811 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
4812 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
4813 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4814 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4815 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4816 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4817 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
4818 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4822 define void @v_shuffle_v4i64_v3i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
4823 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3:
4825 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4826 ; GFX900-NEXT: ;;#ASMSTART
4827 ; GFX900-NEXT: ; def v[0:5]
4828 ; GFX900-NEXT: ;;#ASMEND
4829 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
4830 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
4831 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
4832 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4833 ; GFX900-NEXT: s_nop 0
4834 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
4835 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
4836 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
4837 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
4838 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
4839 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4840 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4842 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3:
4844 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4845 ; GFX90A-NEXT: ;;#ASMSTART
4846 ; GFX90A-NEXT: ; def v[0:5]
4847 ; GFX90A-NEXT: ;;#ASMEND
4848 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
4849 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
4850 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
4851 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4852 ; GFX90A-NEXT: s_nop 0
4853 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
4854 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
4855 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
4856 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
4857 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
4858 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4859 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4861 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3:
4863 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4864 ; GFX940-NEXT: ;;#ASMSTART
4865 ; GFX940-NEXT: ; def v[0:5]
4866 ; GFX940-NEXT: ;;#ASMEND
4867 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
4868 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
4869 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
4870 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
4871 ; GFX940-NEXT: s_nop 1
4872 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
4873 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
4874 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
4875 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
4876 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
4877 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4878 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4879 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4880 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4881 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
4882 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4886 define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
4887 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3:
4889 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4890 ; GFX900-NEXT: ;;#ASMSTART
4891 ; GFX900-NEXT: ; def v[0:5]
4892 ; GFX900-NEXT: ;;#ASMEND
4893 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
4894 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
4895 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
4896 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4897 ; GFX900-NEXT: s_nop 0
4898 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
4899 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
4900 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
4901 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4902 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4904 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3:
4906 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4907 ; GFX90A-NEXT: ;;#ASMSTART
4908 ; GFX90A-NEXT: ; def v[0:5]
4909 ; GFX90A-NEXT: ;;#ASMEND
4910 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
4911 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
4912 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
4913 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4914 ; GFX90A-NEXT: s_nop 0
4915 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
4916 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
4917 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
4918 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4919 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4921 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3:
4923 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4924 ; GFX940-NEXT: ;;#ASMSTART
4925 ; GFX940-NEXT: ; def v[0:5]
4926 ; GFX940-NEXT: ;;#ASMEND
4927 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
4928 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
4929 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
4930 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
4931 ; GFX940-NEXT: s_nop 1
4932 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
4933 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
4934 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
4935 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4936 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4937 %vec0 = call <3 x i64> asm "; def $0", "=v"()
4938 %vec1 = call <3 x i64> asm "; def $0", "=v"()
4939 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
4940 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4944 define void @v_shuffle_v4i64_v3i64__5_0_3_3(ptr addrspace(1) inreg %ptr) {
4945 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3:
4947 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4948 ; GFX900-NEXT: ;;#ASMSTART
4949 ; GFX900-NEXT: ; def v[0:5]
4950 ; GFX900-NEXT: ;;#ASMEND
4951 ; GFX900-NEXT: ;;#ASMSTART
4952 ; GFX900-NEXT: ; def v[2:7]
4953 ; GFX900-NEXT: ;;#ASMEND
4954 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
4955 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
4956 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
4957 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
4958 ; GFX900-NEXT: s_nop 0
4959 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
4960 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
4961 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
4962 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
4963 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
4964 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4965 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4967 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3:
4969 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4970 ; GFX90A-NEXT: ;;#ASMSTART
4971 ; GFX90A-NEXT: ; def v[0:5]
4972 ; GFX90A-NEXT: ;;#ASMEND
4973 ; GFX90A-NEXT: ;;#ASMSTART
4974 ; GFX90A-NEXT: ; def v[2:7]
4975 ; GFX90A-NEXT: ;;#ASMEND
4976 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
4977 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
4978 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
4979 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
4980 ; GFX90A-NEXT: s_nop 0
4981 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
4982 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
4983 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
4984 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
4985 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
4986 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4987 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4989 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3:
4991 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4992 ; GFX940-NEXT: ;;#ASMSTART
4993 ; GFX940-NEXT: ; def v[0:5]
4994 ; GFX940-NEXT: ;;#ASMEND
4995 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
4996 ; GFX940-NEXT: ;;#ASMSTART
4997 ; GFX940-NEXT: ; def v[2:7]
4998 ; GFX940-NEXT: ;;#ASMEND
4999 ; GFX940-NEXT: s_nop 0
5000 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
5001 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
5002 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
5003 ; GFX940-NEXT: s_nop 1
5004 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
5005 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
5006 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
5007 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
5008 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
5009 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5010 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5011 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5012 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5013 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
5014 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5018 define void @v_shuffle_v4i64_v3i64__5_1_3_3(ptr addrspace(1) inreg %ptr) {
5019 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3:
5021 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5022 ; GFX900-NEXT: ;;#ASMSTART
5023 ; GFX900-NEXT: ; def v[0:5]
5024 ; GFX900-NEXT: ;;#ASMEND
5025 ; GFX900-NEXT: ;;#ASMSTART
5026 ; GFX900-NEXT: ; def v[4:9]
5027 ; GFX900-NEXT: ;;#ASMEND
5028 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
5029 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
5030 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
5031 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
5032 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
5033 ; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
5034 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
5035 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5036 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5038 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3:
5040 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5041 ; GFX90A-NEXT: ;;#ASMSTART
5042 ; GFX90A-NEXT: ; def v[0:5]
5043 ; GFX90A-NEXT: ;;#ASMEND
5044 ; GFX90A-NEXT: ;;#ASMSTART
5045 ; GFX90A-NEXT: ; def v[4:9]
5046 ; GFX90A-NEXT: ;;#ASMEND
5047 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
5048 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
5049 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
5050 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
5051 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
5052 ; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
5053 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
5054 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5055 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5057 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3:
5059 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5060 ; GFX940-NEXT: ;;#ASMSTART
5061 ; GFX940-NEXT: ; def v[0:5]
5062 ; GFX940-NEXT: ;;#ASMEND
5063 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
5064 ; GFX940-NEXT: ;;#ASMSTART
5065 ; GFX940-NEXT: ; def v[4:9]
5066 ; GFX940-NEXT: ;;#ASMEND
5067 ; GFX940-NEXT: s_nop 0
5068 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
5069 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
5070 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
5071 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
5072 ; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
5073 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
5074 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5075 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5076 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5077 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5078 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
5079 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5083 define void @v_shuffle_v4i64_v3i64__5_2_3_3(ptr addrspace(1) inreg %ptr) {
5084 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3:
5086 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5087 ; GFX900-NEXT: ;;#ASMSTART
5088 ; GFX900-NEXT: ; def v[0:5]
5089 ; GFX900-NEXT: ;;#ASMEND
5090 ; GFX900-NEXT: ;;#ASMSTART
5091 ; GFX900-NEXT: ; def v[6:11]
5092 ; GFX900-NEXT: ;;#ASMEND
5093 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
5094 ; GFX900-NEXT: v_mov_b32_e32 v8, v6
5095 ; GFX900-NEXT: v_mov_b32_e32 v9, v7
5096 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
5097 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
5098 ; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
5099 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
5100 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5101 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5103 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3:
5105 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5106 ; GFX90A-NEXT: ;;#ASMSTART
5107 ; GFX90A-NEXT: ; def v[0:5]
5108 ; GFX90A-NEXT: ;;#ASMEND
5109 ; GFX90A-NEXT: ;;#ASMSTART
5110 ; GFX90A-NEXT: ; def v[6:11]
5111 ; GFX90A-NEXT: ;;#ASMEND
5112 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
5113 ; GFX90A-NEXT: v_mov_b32_e32 v8, v6
5114 ; GFX90A-NEXT: v_mov_b32_e32 v9, v7
5115 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
5116 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
5117 ; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
5118 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
5119 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5120 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5122 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3:
5124 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5125 ; GFX940-NEXT: ;;#ASMSTART
5126 ; GFX940-NEXT: ; def v[0:5]
5127 ; GFX940-NEXT: ;;#ASMEND
5128 ; GFX940-NEXT: ;;#ASMSTART
5129 ; GFX940-NEXT: ; def v[6:11]
5130 ; GFX940-NEXT: ;;#ASMEND
5131 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
5132 ; GFX940-NEXT: v_mov_b32_e32 v8, v6
5133 ; GFX940-NEXT: v_mov_b32_e32 v9, v7
5134 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
5135 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
5136 ; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
5137 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
5138 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5139 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5140 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5141 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5142 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
5143 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5147 define void @v_shuffle_v4i64_v3i64__5_4_3_3(ptr addrspace(1) inreg %ptr) {
5148 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
5150 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5151 ; GFX900-NEXT: ;;#ASMSTART
5152 ; GFX900-NEXT: ; def v[0:5]
5153 ; GFX900-NEXT: ;;#ASMEND
5154 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
5155 ; GFX900-NEXT: v_mov_b32_e32 v6, v0
5156 ; GFX900-NEXT: v_mov_b32_e32 v7, v1
5157 ; GFX900-NEXT: v_mov_b32_e32 v8, v0
5158 ; GFX900-NEXT: v_mov_b32_e32 v9, v1
5159 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
5160 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
5161 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5162 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
5163 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5164 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5166 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
5168 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5169 ; GFX90A-NEXT: ;;#ASMSTART
5170 ; GFX90A-NEXT: ; def v[0:5]
5171 ; GFX90A-NEXT: ;;#ASMEND
5172 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
5173 ; GFX90A-NEXT: v_mov_b32_e32 v6, v0
5174 ; GFX90A-NEXT: v_mov_b32_e32 v7, v1
5175 ; GFX90A-NEXT: v_mov_b32_e32 v8, v0
5176 ; GFX90A-NEXT: v_mov_b32_e32 v9, v1
5177 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
5178 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
5179 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5180 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
5181 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5182 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5184 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
5186 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5187 ; GFX940-NEXT: ;;#ASMSTART
5188 ; GFX940-NEXT: ; def v[0:5]
5189 ; GFX940-NEXT: ;;#ASMEND
5190 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
5191 ; GFX940-NEXT: v_mov_b32_e32 v6, v0
5192 ; GFX940-NEXT: v_mov_b32_e32 v7, v1
5193 ; GFX940-NEXT: v_mov_b32_e32 v8, v0
5194 ; GFX940-NEXT: v_mov_b32_e32 v9, v1
5195 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
5196 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
5197 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
5198 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
5199 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5200 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5201 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5202 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5203 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
5204 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5208 define void @v_shuffle_v4i64_v3i64__5_5_3_3(ptr addrspace(1) inreg %ptr) {
5209 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3:
5211 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5212 ; GFX900-NEXT: ;;#ASMSTART
5213 ; GFX900-NEXT: ; def v[0:5]
5214 ; GFX900-NEXT: ;;#ASMEND
5215 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
5216 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
5217 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
5218 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5219 ; GFX900-NEXT: s_nop 0
5220 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
5221 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
5222 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
5223 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5224 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5226 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3:
5228 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5229 ; GFX90A-NEXT: ;;#ASMSTART
5230 ; GFX90A-NEXT: ; def v[0:5]
5231 ; GFX90A-NEXT: ;;#ASMEND
5232 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
5233 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
5234 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
5235 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5236 ; GFX90A-NEXT: s_nop 0
5237 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
5238 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
5239 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
5240 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5241 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5243 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3:
5245 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5246 ; GFX940-NEXT: ;;#ASMSTART
5247 ; GFX940-NEXT: ; def v[0:5]
5248 ; GFX940-NEXT: ;;#ASMEND
5249 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
5250 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
5251 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
5252 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
5253 ; GFX940-NEXT: s_nop 1
5254 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
5255 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
5256 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
5257 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5258 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5259 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5260 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5261 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
5262 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5266 define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) {
5267 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
5269 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5270 ; GFX900-NEXT: ;;#ASMSTART
5271 ; GFX900-NEXT: ; def v[0:5]
5272 ; GFX900-NEXT: ;;#ASMEND
5273 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
5274 ; GFX900-NEXT: v_mov_b32_e32 v2, v0
5275 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
5276 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5277 ; GFX900-NEXT: s_nop 0
5278 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
5279 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
5280 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
5281 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5282 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5284 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
5286 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5287 ; GFX90A-NEXT: ;;#ASMSTART
5288 ; GFX90A-NEXT: ; def v[0:5]
5289 ; GFX90A-NEXT: ;;#ASMEND
5290 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
5291 ; GFX90A-NEXT: v_mov_b32_e32 v2, v0
5292 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
5293 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5294 ; GFX90A-NEXT: s_nop 0
5295 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
5296 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
5297 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
5298 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5299 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5301 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
5303 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5304 ; GFX940-NEXT: ;;#ASMSTART
5305 ; GFX940-NEXT: ; def v[0:5]
5306 ; GFX940-NEXT: ;;#ASMEND
5307 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
5308 ; GFX940-NEXT: v_mov_b32_e32 v2, v0
5309 ; GFX940-NEXT: v_mov_b32_e32 v3, v1
5310 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
5311 ; GFX940-NEXT: s_nop 1
5312 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
5313 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
5314 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
5315 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5316 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5317 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5318 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5319 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
5320 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5324 define void @v_shuffle_v4i64_v3i64__5_5_0_3(ptr addrspace(1) inreg %ptr) {
5325 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3:
5327 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5328 ; GFX900-NEXT: ;;#ASMSTART
5329 ; GFX900-NEXT: ; def v[0:5]
5330 ; GFX900-NEXT: ;;#ASMEND
5331 ; GFX900-NEXT: ;;#ASMSTART
5332 ; GFX900-NEXT: ; def v[2:7]
5333 ; GFX900-NEXT: ;;#ASMEND
5334 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
5335 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
5336 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
5337 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
5338 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
5339 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5340 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5342 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3:
5344 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5345 ; GFX90A-NEXT: ;;#ASMSTART
5346 ; GFX90A-NEXT: ; def v[0:5]
5347 ; GFX90A-NEXT: ;;#ASMEND
5348 ; GFX90A-NEXT: ;;#ASMSTART
5349 ; GFX90A-NEXT: ; def v[2:7]
5350 ; GFX90A-NEXT: ;;#ASMEND
5351 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
5352 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
5353 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
5354 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
5355 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
5356 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5357 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5359 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3:
5361 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5362 ; GFX940-NEXT: ;;#ASMSTART
5363 ; GFX940-NEXT: ; def v[0:5]
5364 ; GFX940-NEXT: ;;#ASMEND
5365 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
5366 ; GFX940-NEXT: ;;#ASMSTART
5367 ; GFX940-NEXT: ; def v[2:7]
5368 ; GFX940-NEXT: ;;#ASMEND
5369 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
5370 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
5371 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
5372 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
5373 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5374 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5375 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5376 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5377 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
5378 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5382 define void @v_shuffle_v4i64_v3i64__5_5_1_3(ptr addrspace(1) inreg %ptr) {
5383 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3:
5385 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5386 ; GFX900-NEXT: ;;#ASMSTART
5387 ; GFX900-NEXT: ; def v[0:5]
5388 ; GFX900-NEXT: ;;#ASMEND
5389 ; GFX900-NEXT: ;;#ASMSTART
5390 ; GFX900-NEXT: ; def v[4:9]
5391 ; GFX900-NEXT: ;;#ASMEND
5392 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
5393 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
5394 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
5395 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
5396 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
5397 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5398 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5400 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3:
5402 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5403 ; GFX90A-NEXT: ;;#ASMSTART
5404 ; GFX90A-NEXT: ; def v[0:5]
5405 ; GFX90A-NEXT: ;;#ASMEND
5406 ; GFX90A-NEXT: ;;#ASMSTART
5407 ; GFX90A-NEXT: ; def v[4:9]
5408 ; GFX90A-NEXT: ;;#ASMEND
5409 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
5410 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
5411 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
5412 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
5413 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
5414 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5415 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5417 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3:
5419 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5420 ; GFX940-NEXT: ;;#ASMSTART
5421 ; GFX940-NEXT: ; def v[0:5]
5422 ; GFX940-NEXT: ;;#ASMEND
5423 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
5424 ; GFX940-NEXT: ;;#ASMSTART
5425 ; GFX940-NEXT: ; def v[4:9]
5426 ; GFX940-NEXT: ;;#ASMEND
5427 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
5428 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
5429 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
5430 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
5431 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5432 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5433 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5434 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5435 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
5436 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5440 define void @v_shuffle_v4i64_v3i64__5_5_2_3(ptr addrspace(1) inreg %ptr) {
5441 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
5443 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5444 ; GFX900-NEXT: ;;#ASMSTART
5445 ; GFX900-NEXT: ; def v[0:5]
5446 ; GFX900-NEXT: ;;#ASMEND
5447 ; GFX900-NEXT: ;;#ASMSTART
5448 ; GFX900-NEXT: ; def v[6:11]
5449 ; GFX900-NEXT: ;;#ASMEND
5450 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
5451 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
5452 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
5453 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
5454 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
5455 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
5456 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
5457 ; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
5458 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
5459 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5460 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5462 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
5464 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5465 ; GFX90A-NEXT: ;;#ASMSTART
5466 ; GFX90A-NEXT: ; def v[0:5]
5467 ; GFX90A-NEXT: ;;#ASMEND
5468 ; GFX90A-NEXT: ;;#ASMSTART
5469 ; GFX90A-NEXT: ; def v[6:11]
5470 ; GFX90A-NEXT: ;;#ASMEND
5471 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
5472 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
5473 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
5474 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
5475 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
5476 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
5477 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
5478 ; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
5479 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
5480 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5481 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5483 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
5485 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5486 ; GFX940-NEXT: ;;#ASMSTART
5487 ; GFX940-NEXT: ; def v[0:5]
5488 ; GFX940-NEXT: ;;#ASMEND
5489 ; GFX940-NEXT: ;;#ASMSTART
5490 ; GFX940-NEXT: ; def v[6:11]
5491 ; GFX940-NEXT: ;;#ASMEND
5492 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
5493 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
5494 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
5495 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
5496 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
5497 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
5498 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
5499 ; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
5500 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
5501 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5502 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5503 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5504 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5505 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
5506 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5510 define void @v_shuffle_v4i64_v3i64__5_5_4_3(ptr addrspace(1) inreg %ptr) {
5511 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
5513 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5514 ; GFX900-NEXT: ;;#ASMSTART
5515 ; GFX900-NEXT: ; def v[0:5]
5516 ; GFX900-NEXT: ;;#ASMEND
5517 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
5518 ; GFX900-NEXT: v_mov_b32_e32 v6, v2
5519 ; GFX900-NEXT: v_mov_b32_e32 v7, v3
5520 ; GFX900-NEXT: v_mov_b32_e32 v8, v0
5521 ; GFX900-NEXT: v_mov_b32_e32 v9, v1
5522 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
5523 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
5524 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5525 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
5526 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5527 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5529 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
5531 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5532 ; GFX90A-NEXT: ;;#ASMSTART
5533 ; GFX90A-NEXT: ; def v[0:5]
5534 ; GFX90A-NEXT: ;;#ASMEND
5535 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
5536 ; GFX90A-NEXT: v_mov_b32_e32 v6, v2
5537 ; GFX90A-NEXT: v_mov_b32_e32 v7, v3
5538 ; GFX90A-NEXT: v_mov_b32_e32 v8, v0
5539 ; GFX90A-NEXT: v_mov_b32_e32 v9, v1
5540 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
5541 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
5542 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5543 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
5544 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5545 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5547 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
5549 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5550 ; GFX940-NEXT: ;;#ASMSTART
5551 ; GFX940-NEXT: ; def v[0:5]
5552 ; GFX940-NEXT: ;;#ASMEND
5553 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
5554 ; GFX940-NEXT: v_mov_b32_e32 v6, v2
5555 ; GFX940-NEXT: v_mov_b32_e32 v7, v3
5556 ; GFX940-NEXT: v_mov_b32_e32 v8, v0
5557 ; GFX940-NEXT: v_mov_b32_e32 v9, v1
5558 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
5559 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
5560 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
5561 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
5562 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5563 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5564 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5565 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5566 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
5567 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5571 define void @v_shuffle_v4i64_v3i64__u_4_4_4(ptr addrspace(1) inreg %ptr) {
5572 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4:
5574 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5575 ; GFX900-NEXT: ;;#ASMSTART
5576 ; GFX900-NEXT: ; def v[0:5]
5577 ; GFX900-NEXT: ;;#ASMEND
5578 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
5579 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
5580 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
5581 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5582 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
5583 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5584 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5586 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4:
5588 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5589 ; GFX90A-NEXT: ;;#ASMSTART
5590 ; GFX90A-NEXT: ; def v[0:5]
5591 ; GFX90A-NEXT: ;;#ASMEND
5592 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
5593 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
5594 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
5595 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5596 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
5597 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5598 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5600 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4:
5602 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5603 ; GFX940-NEXT: ;;#ASMSTART
5604 ; GFX940-NEXT: ; def v[0:5]
5605 ; GFX940-NEXT: ;;#ASMEND
5606 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
5607 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
5608 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
5609 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
5610 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
5611 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5612 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5613 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5614 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
5616 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5620 define void @v_shuffle_v4i64_v3i64__0_4_4_4(ptr addrspace(1) inreg %ptr) {
5621 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4:
5623 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5624 ; GFX900-NEXT: ;;#ASMSTART
5625 ; GFX900-NEXT: ; def v[0:5]
5626 ; GFX900-NEXT: ;;#ASMEND
5627 ; GFX900-NEXT: ;;#ASMSTART
5628 ; GFX900-NEXT: ; def v[2:7]
5629 ; GFX900-NEXT: ;;#ASMEND
5630 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
5631 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
5632 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
5633 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
5634 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
5635 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
5636 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
5637 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5638 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5640 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4:
5642 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5643 ; GFX90A-NEXT: ;;#ASMSTART
5644 ; GFX90A-NEXT: ; def v[0:5]
5645 ; GFX90A-NEXT: ;;#ASMEND
5646 ; GFX90A-NEXT: ;;#ASMSTART
5647 ; GFX90A-NEXT: ; def v[2:7]
5648 ; GFX90A-NEXT: ;;#ASMEND
5649 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
5650 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
5651 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
5652 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
5653 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
5654 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
5655 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
5656 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5657 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5659 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4:
5661 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5662 ; GFX940-NEXT: ;;#ASMSTART
5663 ; GFX940-NEXT: ; def v[0:5]
5664 ; GFX940-NEXT: ;;#ASMEND
5665 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
5666 ; GFX940-NEXT: ;;#ASMSTART
5667 ; GFX940-NEXT: ; def v[2:7]
5668 ; GFX940-NEXT: ;;#ASMEND
5669 ; GFX940-NEXT: s_nop 0
5670 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
5671 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
5672 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
5673 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
5674 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
5675 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
5676 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5677 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5678 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5679 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5680 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
5681 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5685 define void @v_shuffle_v4i64_v3i64__1_4_4_4(ptr addrspace(1) inreg %ptr) {
5686 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4:
5688 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5689 ; GFX900-NEXT: ;;#ASMSTART
5690 ; GFX900-NEXT: ; def v[0:5]
5691 ; GFX900-NEXT: ;;#ASMEND
5692 ; GFX900-NEXT: ;;#ASMSTART
5693 ; GFX900-NEXT: ; def v[4:9]
5694 ; GFX900-NEXT: ;;#ASMEND
5695 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
5696 ; GFX900-NEXT: v_mov_b32_e32 v8, v6
5697 ; GFX900-NEXT: v_mov_b32_e32 v9, v7
5698 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
5699 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
5700 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5701 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
5702 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5703 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5705 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4:
5707 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5708 ; GFX90A-NEXT: ;;#ASMSTART
5709 ; GFX90A-NEXT: ; def v[0:5]
5710 ; GFX90A-NEXT: ;;#ASMEND
5711 ; GFX90A-NEXT: ;;#ASMSTART
5712 ; GFX90A-NEXT: ; def v[4:9]
5713 ; GFX90A-NEXT: ;;#ASMEND
5714 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
5715 ; GFX90A-NEXT: v_mov_b32_e32 v8, v6
5716 ; GFX90A-NEXT: v_mov_b32_e32 v9, v7
5717 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
5718 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
5719 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5720 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
5721 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5722 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5724 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4:
5726 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5727 ; GFX940-NEXT: ;;#ASMSTART
5728 ; GFX940-NEXT: ; def v[0:5]
5729 ; GFX940-NEXT: ;;#ASMEND
5730 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
5731 ; GFX940-NEXT: ;;#ASMSTART
5732 ; GFX940-NEXT: ; def v[4:9]
5733 ; GFX940-NEXT: ;;#ASMEND
5734 ; GFX940-NEXT: s_nop 0
5735 ; GFX940-NEXT: v_mov_b32_e32 v8, v6
5736 ; GFX940-NEXT: v_mov_b32_e32 v9, v7
5737 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
5738 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
5739 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
5740 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
5741 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5742 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5743 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5744 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5745 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
5746 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5750 define void @v_shuffle_v4i64_v3i64__2_4_4_4(ptr addrspace(1) inreg %ptr) {
5751 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4:
5753 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5754 ; GFX900-NEXT: ;;#ASMSTART
5755 ; GFX900-NEXT: ; def v[6:11]
5756 ; GFX900-NEXT: ;;#ASMEND
5757 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
5758 ; GFX900-NEXT: ;;#ASMSTART
5759 ; GFX900-NEXT: ; def v[0:5]
5760 ; GFX900-NEXT: ;;#ASMEND
5761 ; GFX900-NEXT: v_mov_b32_e32 v10, v8
5762 ; GFX900-NEXT: v_mov_b32_e32 v11, v9
5763 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
5764 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
5765 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
5766 ; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
5767 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5768 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5770 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4:
5772 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5773 ; GFX90A-NEXT: ;;#ASMSTART
5774 ; GFX90A-NEXT: ; def v[6:11]
5775 ; GFX90A-NEXT: ;;#ASMEND
5776 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
5777 ; GFX90A-NEXT: ;;#ASMSTART
5778 ; GFX90A-NEXT: ; def v[0:5]
5779 ; GFX90A-NEXT: ;;#ASMEND
5780 ; GFX90A-NEXT: v_mov_b32_e32 v10, v8
5781 ; GFX90A-NEXT: v_mov_b32_e32 v11, v9
5782 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
5783 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
5784 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
5785 ; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
5786 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5787 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5789 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4:
5791 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5792 ; GFX940-NEXT: ;;#ASMSTART
5793 ; GFX940-NEXT: ; def v[6:11]
5794 ; GFX940-NEXT: ;;#ASMEND
5795 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
5796 ; GFX940-NEXT: ;;#ASMSTART
5797 ; GFX940-NEXT: ; def v[0:5]
5798 ; GFX940-NEXT: ;;#ASMEND
5799 ; GFX940-NEXT: v_mov_b32_e32 v10, v8
5800 ; GFX940-NEXT: v_mov_b32_e32 v11, v9
5801 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
5802 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
5803 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
5804 ; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1
5805 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5806 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5807 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5808 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5809 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
5810 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5814 define void @v_shuffle_v4i64_v3i64__3_4_4_4(ptr addrspace(1) inreg %ptr) {
5815 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4:
5817 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5818 ; GFX900-NEXT: ;;#ASMSTART
5819 ; GFX900-NEXT: ; def v[0:5]
5820 ; GFX900-NEXT: ;;#ASMEND
5821 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
5822 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
5823 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
5824 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5825 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
5826 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5827 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5829 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4:
5831 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5832 ; GFX90A-NEXT: ;;#ASMSTART
5833 ; GFX90A-NEXT: ; def v[0:5]
5834 ; GFX90A-NEXT: ;;#ASMEND
5835 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
5836 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
5837 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
5838 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5839 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
5840 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5841 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5843 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4:
5845 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5846 ; GFX940-NEXT: ;;#ASMSTART
5847 ; GFX940-NEXT: ; def v[0:5]
5848 ; GFX940-NEXT: ;;#ASMEND
5849 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
5850 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
5851 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
5852 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
5853 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
5854 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5855 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5856 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5857 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5858 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
5859 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5863 define void @v_shuffle_v4i64_v3i64__4_4_4_4(ptr addrspace(1) inreg %ptr) {
5864 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4:
5866 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5867 ; GFX900-NEXT: ;;#ASMSTART
5868 ; GFX900-NEXT: ; def v[0:5]
5869 ; GFX900-NEXT: ;;#ASMEND
5870 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
5871 ; GFX900-NEXT: v_mov_b32_e32 v4, v2
5872 ; GFX900-NEXT: v_mov_b32_e32 v5, v3
5873 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5874 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
5875 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5876 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5878 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4:
5880 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5881 ; GFX90A-NEXT: ;;#ASMSTART
5882 ; GFX90A-NEXT: ; def v[0:5]
5883 ; GFX90A-NEXT: ;;#ASMEND
5884 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
5885 ; GFX90A-NEXT: v_mov_b32_e32 v4, v2
5886 ; GFX90A-NEXT: v_mov_b32_e32 v5, v3
5887 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5888 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
5889 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5890 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5892 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4:
5894 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5895 ; GFX940-NEXT: ;;#ASMSTART
5896 ; GFX940-NEXT: ; def v[0:5]
5897 ; GFX940-NEXT: ;;#ASMEND
5898 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
5899 ; GFX940-NEXT: v_mov_b32_e32 v4, v2
5900 ; GFX940-NEXT: v_mov_b32_e32 v5, v3
5901 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
5902 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
5903 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5904 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5905 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5906 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5907 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
5908 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5912 define void @v_shuffle_v4i64_v3i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
5913 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4:
5915 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5916 ; GFX900-NEXT: ;;#ASMSTART
5917 ; GFX900-NEXT: ; def v[0:5]
5918 ; GFX900-NEXT: ;;#ASMEND
5919 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
5920 ; GFX900-NEXT: v_mov_b32_e32 v0, v2
5921 ; GFX900-NEXT: v_mov_b32_e32 v1, v3
5922 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5923 ; GFX900-NEXT: s_nop 0
5924 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
5925 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
5926 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
5927 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5928 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5930 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4:
5932 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5933 ; GFX90A-NEXT: ;;#ASMSTART
5934 ; GFX90A-NEXT: ; def v[0:5]
5935 ; GFX90A-NEXT: ;;#ASMEND
5936 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
5937 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
5938 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
5939 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5940 ; GFX90A-NEXT: s_nop 0
5941 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
5942 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
5943 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
5944 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5945 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5947 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4:
5949 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5950 ; GFX940-NEXT: ;;#ASMSTART
5951 ; GFX940-NEXT: ; def v[0:5]
5952 ; GFX940-NEXT: ;;#ASMEND
5953 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
5954 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
5955 ; GFX940-NEXT: v_mov_b32_e32 v1, v3
5956 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
5957 ; GFX940-NEXT: s_nop 1
5958 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
5959 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
5960 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
5961 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5962 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5963 %vec0 = call <3 x i64> asm "; def $0", "=v"()
5964 %vec1 = call <3 x i64> asm "; def $0", "=v"()
5965 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
5966 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5970 define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
5971 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4:
5973 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5974 ; GFX900-NEXT: ;;#ASMSTART
5975 ; GFX900-NEXT: ; def v[0:5]
5976 ; GFX900-NEXT: ;;#ASMEND
5977 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
5978 ; GFX900-NEXT: v_mov_b32_e32 v0, v2
5979 ; GFX900-NEXT: v_mov_b32_e32 v1, v3
5980 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5981 ; GFX900-NEXT: s_nop 0
5982 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
5983 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
5984 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
5985 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5986 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5988 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4:
5990 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5991 ; GFX90A-NEXT: ;;#ASMSTART
5992 ; GFX90A-NEXT: ; def v[0:5]
5993 ; GFX90A-NEXT: ;;#ASMEND
5994 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
5995 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
5996 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
5997 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5998 ; GFX90A-NEXT: s_nop 0
5999 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
6000 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
6001 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
6002 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6003 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6005 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4:
6007 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6008 ; GFX940-NEXT: ;;#ASMSTART
6009 ; GFX940-NEXT: ; def v[0:5]
6010 ; GFX940-NEXT: ;;#ASMEND
6011 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
6012 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
6013 ; GFX940-NEXT: v_mov_b32_e32 v1, v3
6014 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6015 ; GFX940-NEXT: s_nop 1
6016 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
6017 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
6018 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
6019 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6020 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6021 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6022 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6023 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
6024 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6028 define void @v_shuffle_v4i64_v3i64__5_0_4_4(ptr addrspace(1) inreg %ptr) {
6029 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4:
6031 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6032 ; GFX900-NEXT: ;;#ASMSTART
6033 ; GFX900-NEXT: ; def v[0:5]
6034 ; GFX900-NEXT: ;;#ASMEND
6035 ; GFX900-NEXT: ;;#ASMSTART
6036 ; GFX900-NEXT: ; def v[2:7]
6037 ; GFX900-NEXT: ;;#ASMEND
6038 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
6039 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6040 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6041 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
6042 ; GFX900-NEXT: s_nop 0
6043 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
6044 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
6045 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
6046 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
6047 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
6048 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6049 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6051 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4:
6053 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6054 ; GFX90A-NEXT: ;;#ASMSTART
6055 ; GFX90A-NEXT: ; def v[0:5]
6056 ; GFX90A-NEXT: ;;#ASMEND
6057 ; GFX90A-NEXT: ;;#ASMSTART
6058 ; GFX90A-NEXT: ; def v[2:7]
6059 ; GFX90A-NEXT: ;;#ASMEND
6060 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
6061 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6062 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6063 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
6064 ; GFX90A-NEXT: s_nop 0
6065 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
6066 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
6067 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
6068 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
6069 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
6070 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6071 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6073 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4:
6075 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6076 ; GFX940-NEXT: ;;#ASMSTART
6077 ; GFX940-NEXT: ; def v[0:5]
6078 ; GFX940-NEXT: ;;#ASMEND
6079 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
6080 ; GFX940-NEXT: ;;#ASMSTART
6081 ; GFX940-NEXT: ; def v[2:7]
6082 ; GFX940-NEXT: ;;#ASMEND
6083 ; GFX940-NEXT: s_nop 0
6084 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6085 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6086 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
6087 ; GFX940-NEXT: s_nop 1
6088 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
6089 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
6090 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
6091 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
6092 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
6093 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6094 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6095 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6096 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6097 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
6098 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6102 define void @v_shuffle_v4i64_v3i64__5_1_4_4(ptr addrspace(1) inreg %ptr) {
6103 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4:
6105 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6106 ; GFX900-NEXT: ;;#ASMSTART
6107 ; GFX900-NEXT: ; def v[0:5]
6108 ; GFX900-NEXT: ;;#ASMEND
6109 ; GFX900-NEXT: ;;#ASMSTART
6110 ; GFX900-NEXT: ; def v[4:9]
6111 ; GFX900-NEXT: ;;#ASMEND
6112 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
6113 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
6114 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
6115 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
6116 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
6117 ; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
6118 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
6119 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6120 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6122 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4:
6124 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6125 ; GFX90A-NEXT: ;;#ASMSTART
6126 ; GFX90A-NEXT: ; def v[0:5]
6127 ; GFX90A-NEXT: ;;#ASMEND
6128 ; GFX90A-NEXT: ;;#ASMSTART
6129 ; GFX90A-NEXT: ; def v[4:9]
6130 ; GFX90A-NEXT: ;;#ASMEND
6131 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
6132 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
6133 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
6134 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
6135 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
6136 ; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
6137 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
6138 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6139 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6141 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4:
6143 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6144 ; GFX940-NEXT: ;;#ASMSTART
6145 ; GFX940-NEXT: ; def v[0:5]
6146 ; GFX940-NEXT: ;;#ASMEND
6147 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
6148 ; GFX940-NEXT: ;;#ASMSTART
6149 ; GFX940-NEXT: ; def v[4:9]
6150 ; GFX940-NEXT: ;;#ASMEND
6151 ; GFX940-NEXT: s_nop 0
6152 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
6153 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
6154 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
6155 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
6156 ; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
6157 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
6158 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6159 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6160 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6161 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6162 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
6163 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6167 define void @v_shuffle_v4i64_v3i64__5_2_4_4(ptr addrspace(1) inreg %ptr) {
6168 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4:
6170 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6171 ; GFX900-NEXT: ;;#ASMSTART
6172 ; GFX900-NEXT: ; def v[0:5]
6173 ; GFX900-NEXT: ;;#ASMEND
6174 ; GFX900-NEXT: ;;#ASMSTART
6175 ; GFX900-NEXT: ; def v[6:11]
6176 ; GFX900-NEXT: ;;#ASMEND
6177 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
6178 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
6179 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
6180 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
6181 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
6182 ; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6183 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
6184 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6185 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6187 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4:
6189 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6190 ; GFX90A-NEXT: ;;#ASMSTART
6191 ; GFX90A-NEXT: ; def v[0:5]
6192 ; GFX90A-NEXT: ;;#ASMEND
6193 ; GFX90A-NEXT: ;;#ASMSTART
6194 ; GFX90A-NEXT: ; def v[6:11]
6195 ; GFX90A-NEXT: ;;#ASMEND
6196 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
6197 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
6198 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
6199 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
6200 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
6201 ; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6202 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
6203 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6204 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6206 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4:
6208 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6209 ; GFX940-NEXT: ;;#ASMSTART
6210 ; GFX940-NEXT: ; def v[0:5]
6211 ; GFX940-NEXT: ;;#ASMEND
6212 ; GFX940-NEXT: ;;#ASMSTART
6213 ; GFX940-NEXT: ; def v[6:11]
6214 ; GFX940-NEXT: ;;#ASMEND
6215 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
6216 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
6217 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
6218 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
6219 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
6220 ; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
6221 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
6222 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6223 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6224 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6225 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6226 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
6227 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6231 define void @v_shuffle_v4i64_v3i64__5_3_4_4(ptr addrspace(1) inreg %ptr) {
6232 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
6234 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6235 ; GFX900-NEXT: ;;#ASMSTART
6236 ; GFX900-NEXT: ; def v[0:5]
6237 ; GFX900-NEXT: ;;#ASMEND
6238 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
6239 ; GFX900-NEXT: v_mov_b32_e32 v6, v2
6240 ; GFX900-NEXT: v_mov_b32_e32 v7, v3
6241 ; GFX900-NEXT: v_mov_b32_e32 v8, v2
6242 ; GFX900-NEXT: v_mov_b32_e32 v9, v3
6243 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6244 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6245 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
6246 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
6247 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6248 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
6249 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6250 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6252 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
6254 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6255 ; GFX90A-NEXT: ;;#ASMSTART
6256 ; GFX90A-NEXT: ; def v[0:5]
6257 ; GFX90A-NEXT: ;;#ASMEND
6258 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
6259 ; GFX90A-NEXT: v_mov_b32_e32 v6, v2
6260 ; GFX90A-NEXT: v_mov_b32_e32 v7, v3
6261 ; GFX90A-NEXT: v_mov_b32_e32 v8, v2
6262 ; GFX90A-NEXT: v_mov_b32_e32 v9, v3
6263 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6264 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6265 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
6266 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
6267 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6268 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
6269 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6270 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6272 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
6274 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6275 ; GFX940-NEXT: ;;#ASMSTART
6276 ; GFX940-NEXT: ; def v[0:5]
6277 ; GFX940-NEXT: ;;#ASMEND
6278 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
6279 ; GFX940-NEXT: v_mov_b32_e32 v6, v2
6280 ; GFX940-NEXT: v_mov_b32_e32 v7, v3
6281 ; GFX940-NEXT: v_mov_b32_e32 v8, v2
6282 ; GFX940-NEXT: v_mov_b32_e32 v9, v3
6283 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6284 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6285 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
6286 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
6287 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
6288 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
6289 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6290 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6291 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6292 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6293 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
6294 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6298 define void @v_shuffle_v4i64_v3i64__5_5_4_4(ptr addrspace(1) inreg %ptr) {
6299 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4:
6301 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6302 ; GFX900-NEXT: ;;#ASMSTART
6303 ; GFX900-NEXT: ; def v[0:5]
6304 ; GFX900-NEXT: ;;#ASMEND
6305 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
6306 ; GFX900-NEXT: v_mov_b32_e32 v0, v2
6307 ; GFX900-NEXT: v_mov_b32_e32 v1, v3
6308 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6309 ; GFX900-NEXT: s_nop 0
6310 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6311 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6312 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6313 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6314 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6316 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4:
6318 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6319 ; GFX90A-NEXT: ;;#ASMSTART
6320 ; GFX90A-NEXT: ; def v[0:5]
6321 ; GFX90A-NEXT: ;;#ASMEND
6322 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
6323 ; GFX90A-NEXT: v_mov_b32_e32 v0, v2
6324 ; GFX90A-NEXT: v_mov_b32_e32 v1, v3
6325 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6326 ; GFX90A-NEXT: s_nop 0
6327 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6328 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6329 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6330 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6331 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6333 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4:
6335 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6336 ; GFX940-NEXT: ;;#ASMSTART
6337 ; GFX940-NEXT: ; def v[0:5]
6338 ; GFX940-NEXT: ;;#ASMEND
6339 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
6340 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
6341 ; GFX940-NEXT: v_mov_b32_e32 v1, v3
6342 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6343 ; GFX940-NEXT: s_nop 1
6344 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6345 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6346 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6347 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6348 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6349 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6350 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6351 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
6352 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6356 define void @v_shuffle_v4i64_v3i64__5_5_u_4(ptr addrspace(1) inreg %ptr) {
6357 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
6359 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6360 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
6361 ; GFX900-NEXT: ;;#ASMSTART
6362 ; GFX900-NEXT: ; def v[0:5]
6363 ; GFX900-NEXT: ;;#ASMEND
6364 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6365 ; GFX900-NEXT: s_nop 0
6366 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6367 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6368 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6369 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6370 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6372 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
6374 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6375 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
6376 ; GFX90A-NEXT: ;;#ASMSTART
6377 ; GFX90A-NEXT: ; def v[0:5]
6378 ; GFX90A-NEXT: ;;#ASMEND
6379 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6380 ; GFX90A-NEXT: s_nop 0
6381 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6382 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6383 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6384 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6385 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6387 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
6389 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6390 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
6391 ; GFX940-NEXT: ;;#ASMSTART
6392 ; GFX940-NEXT: ; def v[0:5]
6393 ; GFX940-NEXT: ;;#ASMEND
6394 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6395 ; GFX940-NEXT: s_nop 1
6396 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6397 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6398 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6399 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6400 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6401 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6402 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6403 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
6404 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6408 define void @v_shuffle_v4i64_v3i64__5_5_0_4(ptr addrspace(1) inreg %ptr) {
6409 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4:
6411 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6412 ; GFX900-NEXT: ;;#ASMSTART
6413 ; GFX900-NEXT: ; def v[0:5]
6414 ; GFX900-NEXT: ;;#ASMEND
6415 ; GFX900-NEXT: ;;#ASMSTART
6416 ; GFX900-NEXT: ; def v[2:7]
6417 ; GFX900-NEXT: ;;#ASMEND
6418 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
6419 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6420 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6421 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
6422 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
6423 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
6424 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
6425 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6426 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6428 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4:
6430 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6431 ; GFX90A-NEXT: ;;#ASMSTART
6432 ; GFX90A-NEXT: ; def v[0:5]
6433 ; GFX90A-NEXT: ;;#ASMEND
6434 ; GFX90A-NEXT: ;;#ASMSTART
6435 ; GFX90A-NEXT: ; def v[2:7]
6436 ; GFX90A-NEXT: ;;#ASMEND
6437 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
6438 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6439 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6440 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
6441 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
6442 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
6443 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
6444 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6445 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6447 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4:
6449 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6450 ; GFX940-NEXT: ;;#ASMSTART
6451 ; GFX940-NEXT: ; def v[0:5]
6452 ; GFX940-NEXT: ;;#ASMEND
6453 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
6454 ; GFX940-NEXT: ;;#ASMSTART
6455 ; GFX940-NEXT: ; def v[2:7]
6456 ; GFX940-NEXT: ;;#ASMEND
6457 ; GFX940-NEXT: s_nop 0
6458 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6459 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6460 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
6461 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
6462 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
6463 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
6464 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6465 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6466 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6467 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6468 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
6469 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6473 define void @v_shuffle_v4i64_v3i64__5_5_1_4(ptr addrspace(1) inreg %ptr) {
6474 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4:
6476 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6477 ; GFX900-NEXT: ;;#ASMSTART
6478 ; GFX900-NEXT: ; def v[0:5]
6479 ; GFX900-NEXT: ;;#ASMEND
6480 ; GFX900-NEXT: ;;#ASMSTART
6481 ; GFX900-NEXT: ; def v[4:9]
6482 ; GFX900-NEXT: ;;#ASMEND
6483 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
6484 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
6485 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
6486 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
6487 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
6488 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
6489 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
6490 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6491 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6493 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4:
6495 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6496 ; GFX90A-NEXT: ;;#ASMSTART
6497 ; GFX90A-NEXT: ; def v[0:5]
6498 ; GFX90A-NEXT: ;;#ASMEND
6499 ; GFX90A-NEXT: ;;#ASMSTART
6500 ; GFX90A-NEXT: ; def v[4:9]
6501 ; GFX90A-NEXT: ;;#ASMEND
6502 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
6503 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
6504 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
6505 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
6506 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
6507 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
6508 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
6509 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6510 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6512 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4:
6514 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515 ; GFX940-NEXT: ;;#ASMSTART
6516 ; GFX940-NEXT: ; def v[0:5]
6517 ; GFX940-NEXT: ;;#ASMEND
6518 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
6519 ; GFX940-NEXT: ;;#ASMSTART
6520 ; GFX940-NEXT: ; def v[4:9]
6521 ; GFX940-NEXT: ;;#ASMEND
6522 ; GFX940-NEXT: s_nop 0
6523 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
6524 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
6525 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
6526 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
6527 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
6528 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
6529 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6530 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6531 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6532 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6533 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
6534 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6538 define void @v_shuffle_v4i64_v3i64__5_5_2_4(ptr addrspace(1) inreg %ptr) {
6539 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4:
6541 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6542 ; GFX900-NEXT: ;;#ASMSTART
6543 ; GFX900-NEXT: ; def v[6:11]
6544 ; GFX900-NEXT: ;;#ASMEND
6545 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
6546 ; GFX900-NEXT: ;;#ASMSTART
6547 ; GFX900-NEXT: ; def v[0:5]
6548 ; GFX900-NEXT: ;;#ASMEND
6549 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
6550 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
6551 ; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6552 ; GFX900-NEXT: s_nop 0
6553 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
6554 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
6555 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
6556 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6557 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6559 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4:
6561 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6562 ; GFX90A-NEXT: ;;#ASMSTART
6563 ; GFX90A-NEXT: ; def v[6:11]
6564 ; GFX90A-NEXT: ;;#ASMEND
6565 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
6566 ; GFX90A-NEXT: ;;#ASMSTART
6567 ; GFX90A-NEXT: ; def v[0:5]
6568 ; GFX90A-NEXT: ;;#ASMEND
6569 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
6570 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
6571 ; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6572 ; GFX90A-NEXT: s_nop 0
6573 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
6574 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
6575 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
6576 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6577 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6579 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4:
6581 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6582 ; GFX940-NEXT: ;;#ASMSTART
6583 ; GFX940-NEXT: ; def v[6:11]
6584 ; GFX940-NEXT: ;;#ASMEND
6585 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
6586 ; GFX940-NEXT: ;;#ASMSTART
6587 ; GFX940-NEXT: ; def v[0:5]
6588 ; GFX940-NEXT: ;;#ASMEND
6589 ; GFX940-NEXT: s_nop 0
6590 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
6591 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
6592 ; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
6593 ; GFX940-NEXT: s_nop 1
6594 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
6595 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
6596 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
6597 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6598 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6599 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6600 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6601 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
6602 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6606 define void @v_shuffle_v4i64_v3i64__5_5_3_4(ptr addrspace(1) inreg %ptr) {
6607 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
6609 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6610 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
6611 ; GFX900-NEXT: ;;#ASMSTART
6612 ; GFX900-NEXT: ; def v[0:5]
6613 ; GFX900-NEXT: ;;#ASMEND
6614 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6615 ; GFX900-NEXT: s_nop 0
6616 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6617 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6618 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6619 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6620 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6622 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
6624 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6625 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
6626 ; GFX90A-NEXT: ;;#ASMSTART
6627 ; GFX90A-NEXT: ; def v[0:5]
6628 ; GFX90A-NEXT: ;;#ASMEND
6629 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6630 ; GFX90A-NEXT: s_nop 0
6631 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6632 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6633 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6634 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6635 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6637 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
6639 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6640 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
6641 ; GFX940-NEXT: ;;#ASMSTART
6642 ; GFX940-NEXT: ; def v[0:5]
6643 ; GFX940-NEXT: ;;#ASMEND
6644 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6645 ; GFX940-NEXT: s_nop 1
6646 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6647 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6648 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6649 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6650 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6651 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6652 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6653 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
6654 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6658 define void @v_shuffle_v4i64_v3i64__u_5_5_5(ptr addrspace(1) inreg %ptr) {
6659 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
6661 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6662 ; GFX900-NEXT: ;;#ASMSTART
6663 ; GFX900-NEXT: ; def v[0:5]
6664 ; GFX900-NEXT: ;;#ASMEND
6665 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
6666 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6667 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6668 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6669 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6670 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6671 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6673 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
6675 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6676 ; GFX90A-NEXT: ;;#ASMSTART
6677 ; GFX90A-NEXT: ; def v[0:5]
6678 ; GFX90A-NEXT: ;;#ASMEND
6679 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
6680 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6681 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6682 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6683 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
6684 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6685 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6687 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
6689 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6690 ; GFX940-NEXT: ;;#ASMSTART
6691 ; GFX940-NEXT: ; def v[0:5]
6692 ; GFX940-NEXT: ;;#ASMEND
6693 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
6694 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6695 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6696 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
6697 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6698 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6699 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6700 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6701 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6702 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
6703 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6707 define void @v_shuffle_v4i64_v3i64__0_5_5_5(ptr addrspace(1) inreg %ptr) {
6708 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5:
6710 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6711 ; GFX900-NEXT: ;;#ASMSTART
6712 ; GFX900-NEXT: ; def v[0:5]
6713 ; GFX900-NEXT: ;;#ASMEND
6714 ; GFX900-NEXT: ;;#ASMSTART
6715 ; GFX900-NEXT: ; def v[2:7]
6716 ; GFX900-NEXT: ;;#ASMEND
6717 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
6718 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
6719 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
6720 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
6721 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
6722 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
6723 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
6724 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6725 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6727 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5:
6729 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6730 ; GFX90A-NEXT: ;;#ASMSTART
6731 ; GFX90A-NEXT: ; def v[0:5]
6732 ; GFX90A-NEXT: ;;#ASMEND
6733 ; GFX90A-NEXT: ;;#ASMSTART
6734 ; GFX90A-NEXT: ; def v[2:7]
6735 ; GFX90A-NEXT: ;;#ASMEND
6736 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
6737 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
6738 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
6739 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
6740 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
6741 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
6742 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
6743 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6744 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6746 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5:
6748 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6749 ; GFX940-NEXT: ;;#ASMSTART
6750 ; GFX940-NEXT: ; def v[0:5]
6751 ; GFX940-NEXT: ;;#ASMEND
6752 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
6753 ; GFX940-NEXT: ;;#ASMSTART
6754 ; GFX940-NEXT: ; def v[2:7]
6755 ; GFX940-NEXT: ;;#ASMEND
6756 ; GFX940-NEXT: s_nop 0
6757 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
6758 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
6759 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
6760 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
6761 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
6762 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
6763 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6764 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6765 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6766 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6767 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
6768 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6772 define void @v_shuffle_v4i64_v3i64__1_5_5_5(ptr addrspace(1) inreg %ptr) {
6773 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5:
6775 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6776 ; GFX900-NEXT: ;;#ASMSTART
6777 ; GFX900-NEXT: ; def v[0:5]
6778 ; GFX900-NEXT: ;;#ASMEND
6779 ; GFX900-NEXT: ;;#ASMSTART
6780 ; GFX900-NEXT: ; def v[4:9]
6781 ; GFX900-NEXT: ;;#ASMEND
6782 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
6783 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
6784 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
6785 ; GFX900-NEXT: v_mov_b32_e32 v4, v8
6786 ; GFX900-NEXT: v_mov_b32_e32 v5, v9
6787 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6788 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
6789 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6790 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6792 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5:
6794 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6795 ; GFX90A-NEXT: ;;#ASMSTART
6796 ; GFX90A-NEXT: ; def v[0:5]
6797 ; GFX90A-NEXT: ;;#ASMEND
6798 ; GFX90A-NEXT: ;;#ASMSTART
6799 ; GFX90A-NEXT: ; def v[4:9]
6800 ; GFX90A-NEXT: ;;#ASMEND
6801 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
6802 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
6803 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
6804 ; GFX90A-NEXT: v_mov_b32_e32 v4, v8
6805 ; GFX90A-NEXT: v_mov_b32_e32 v5, v9
6806 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6807 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
6808 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6809 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6811 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5:
6813 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6814 ; GFX940-NEXT: ;;#ASMSTART
6815 ; GFX940-NEXT: ; def v[0:5]
6816 ; GFX940-NEXT: ;;#ASMEND
6817 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
6818 ; GFX940-NEXT: ;;#ASMSTART
6819 ; GFX940-NEXT: ; def v[4:9]
6820 ; GFX940-NEXT: ;;#ASMEND
6821 ; GFX940-NEXT: s_nop 0
6822 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
6823 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
6824 ; GFX940-NEXT: v_mov_b32_e32 v4, v8
6825 ; GFX940-NEXT: v_mov_b32_e32 v5, v9
6826 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
6827 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
6828 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6829 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6830 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6831 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6832 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
6833 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6837 define void @v_shuffle_v4i64_v3i64__2_5_5_5(ptr addrspace(1) inreg %ptr) {
6838 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5:
6840 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6841 ; GFX900-NEXT: ;;#ASMSTART
6842 ; GFX900-NEXT: ; def v[6:11]
6843 ; GFX900-NEXT: ;;#ASMEND
6844 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
6845 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
6846 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
6847 ; GFX900-NEXT: ;;#ASMSTART
6848 ; GFX900-NEXT: ; def v[0:5]
6849 ; GFX900-NEXT: ;;#ASMEND
6850 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
6851 ; GFX900-NEXT: s_nop 0
6852 ; GFX900-NEXT: v_mov_b32_e32 v8, v4
6853 ; GFX900-NEXT: v_mov_b32_e32 v9, v5
6854 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
6855 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6856 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6858 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5:
6860 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6861 ; GFX90A-NEXT: ;;#ASMSTART
6862 ; GFX90A-NEXT: ; def v[6:11]
6863 ; GFX90A-NEXT: ;;#ASMEND
6864 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
6865 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
6866 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
6867 ; GFX90A-NEXT: ;;#ASMSTART
6868 ; GFX90A-NEXT: ; def v[0:5]
6869 ; GFX90A-NEXT: ;;#ASMEND
6870 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
6871 ; GFX90A-NEXT: s_nop 0
6872 ; GFX90A-NEXT: v_mov_b32_e32 v8, v4
6873 ; GFX90A-NEXT: v_mov_b32_e32 v9, v5
6874 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
6875 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6876 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6878 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5:
6880 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6881 ; GFX940-NEXT: ;;#ASMSTART
6882 ; GFX940-NEXT: ; def v[6:11]
6883 ; GFX940-NEXT: ;;#ASMEND
6884 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
6885 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
6886 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
6887 ; GFX940-NEXT: ;;#ASMSTART
6888 ; GFX940-NEXT: ; def v[0:5]
6889 ; GFX940-NEXT: ;;#ASMEND
6890 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
6891 ; GFX940-NEXT: s_nop 1
6892 ; GFX940-NEXT: v_mov_b32_e32 v8, v4
6893 ; GFX940-NEXT: v_mov_b32_e32 v9, v5
6894 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
6895 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6896 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6897 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6898 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6899 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
6900 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6904 define void @v_shuffle_v4i64_v3i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
6905 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5:
6907 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6908 ; GFX900-NEXT: ;;#ASMSTART
6909 ; GFX900-NEXT: ; def v[0:5]
6910 ; GFX900-NEXT: ;;#ASMEND
6911 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
6912 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
6913 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
6914 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6915 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
6916 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6917 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6919 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5:
6921 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6922 ; GFX90A-NEXT: ;;#ASMSTART
6923 ; GFX90A-NEXT: ; def v[0:5]
6924 ; GFX90A-NEXT: ;;#ASMEND
6925 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
6926 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
6927 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
6928 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6929 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
6930 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6931 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6933 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5:
6935 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6936 ; GFX940-NEXT: ;;#ASMSTART
6937 ; GFX940-NEXT: ; def v[0:5]
6938 ; GFX940-NEXT: ;;#ASMEND
6939 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
6940 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
6941 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
6942 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
6943 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
6944 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6945 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6946 %vec0 = call <3 x i64> asm "; def $0", "=v"()
6947 %vec1 = call <3 x i64> asm "; def $0", "=v"()
6948 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
6949 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6953 define void @v_shuffle_v4i64_v3i64__4_5_5_5(ptr addrspace(1) inreg %ptr) {
6954 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
6956 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6957 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
6958 ; GFX900-NEXT: ;;#ASMSTART
6959 ; GFX900-NEXT: ; def v[0:5]
6960 ; GFX900-NEXT: ;;#ASMEND
6961 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
6962 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
6963 ; GFX900-NEXT: v_mov_b32_e32 v8, v4
6964 ; GFX900-NEXT: v_mov_b32_e32 v9, v5
6965 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6966 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
6967 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6968 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6970 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
6972 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6973 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
6974 ; GFX90A-NEXT: ;;#ASMSTART
6975 ; GFX90A-NEXT: ; def v[0:5]
6976 ; GFX90A-NEXT: ;;#ASMEND
6977 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
6978 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
6979 ; GFX90A-NEXT: v_mov_b32_e32 v8, v4
6980 ; GFX90A-NEXT: v_mov_b32_e32 v9, v5
6981 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6982 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
6983 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6984 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6986 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
6988 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6989 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
6990 ; GFX940-NEXT: ;;#ASMSTART
6991 ; GFX940-NEXT: ; def v[0:5]
6992 ; GFX940-NEXT: ;;#ASMEND
6993 ; GFX940-NEXT: s_nop 0
6994 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
6995 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
6996 ; GFX940-NEXT: v_mov_b32_e32 v8, v4
6997 ; GFX940-NEXT: v_mov_b32_e32 v9, v5
6998 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
6999 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
7000 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7001 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7002 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7003 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7004 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
7005 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7009 define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
7010 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
7012 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7013 ; GFX900-NEXT: ;;#ASMSTART
7014 ; GFX900-NEXT: ; def v[0:5]
7015 ; GFX900-NEXT: ;;#ASMEND
7016 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
7017 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
7018 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
7019 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
7020 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
7021 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7022 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
7023 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7024 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7026 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
7028 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7029 ; GFX90A-NEXT: ;;#ASMSTART
7030 ; GFX90A-NEXT: ; def v[0:5]
7031 ; GFX90A-NEXT: ;;#ASMEND
7032 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
7033 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
7034 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
7035 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
7036 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
7037 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7038 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
7039 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7040 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7042 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
7044 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7045 ; GFX940-NEXT: ;;#ASMSTART
7046 ; GFX940-NEXT: ; def v[0:5]
7047 ; GFX940-NEXT: ;;#ASMEND
7048 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
7049 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
7050 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
7051 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
7052 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
7053 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7054 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
7055 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7056 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7057 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7058 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7059 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
7060 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7064 define void @v_shuffle_v4i64_v3i64__5_0_5_5(ptr addrspace(1) inreg %ptr) {
7065 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
7067 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7068 ; GFX900-NEXT: ;;#ASMSTART
7069 ; GFX900-NEXT: ; def v[0:5]
7070 ; GFX900-NEXT: ;;#ASMEND
7071 ; GFX900-NEXT: ;;#ASMSTART
7072 ; GFX900-NEXT: ; def v[2:7]
7073 ; GFX900-NEXT: ;;#ASMEND
7074 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
7075 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
7076 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
7077 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
7078 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
7079 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
7080 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
7081 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
7082 ; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
7083 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7084 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7086 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
7088 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7089 ; GFX90A-NEXT: ;;#ASMSTART
7090 ; GFX90A-NEXT: ; def v[0:5]
7091 ; GFX90A-NEXT: ;;#ASMEND
7092 ; GFX90A-NEXT: ;;#ASMSTART
7093 ; GFX90A-NEXT: ; def v[2:7]
7094 ; GFX90A-NEXT: ;;#ASMEND
7095 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
7096 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
7097 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
7098 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
7099 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
7100 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
7101 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
7102 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
7103 ; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
7104 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7105 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7107 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
7109 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7110 ; GFX940-NEXT: ;;#ASMSTART
7111 ; GFX940-NEXT: ; def v[0:5]
7112 ; GFX940-NEXT: ;;#ASMEND
7113 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
7114 ; GFX940-NEXT: ;;#ASMSTART
7115 ; GFX940-NEXT: ; def v[2:7]
7116 ; GFX940-NEXT: ;;#ASMEND
7117 ; GFX940-NEXT: s_nop 0
7118 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
7119 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
7120 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
7121 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
7122 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
7123 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
7124 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
7125 ; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
7126 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7127 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7128 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7129 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7130 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
7131 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7135 define void @v_shuffle_v4i64_v3i64__5_1_5_5(ptr addrspace(1) inreg %ptr) {
7136 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5:
7138 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7139 ; GFX900-NEXT: ;;#ASMSTART
7140 ; GFX900-NEXT: ; def v[0:5]
7141 ; GFX900-NEXT: ;;#ASMEND
7142 ; GFX900-NEXT: ;;#ASMSTART
7143 ; GFX900-NEXT: ; def v[4:9]
7144 ; GFX900-NEXT: ;;#ASMEND
7145 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
7146 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
7147 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
7148 ; GFX900-NEXT: v_mov_b32_e32 v0, v8
7149 ; GFX900-NEXT: v_mov_b32_e32 v1, v9
7150 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7151 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
7152 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7153 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7155 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5:
7157 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7158 ; GFX90A-NEXT: ;;#ASMSTART
7159 ; GFX90A-NEXT: ; def v[0:5]
7160 ; GFX90A-NEXT: ;;#ASMEND
7161 ; GFX90A-NEXT: ;;#ASMSTART
7162 ; GFX90A-NEXT: ; def v[4:9]
7163 ; GFX90A-NEXT: ;;#ASMEND
7164 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
7165 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
7166 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
7167 ; GFX90A-NEXT: v_mov_b32_e32 v0, v8
7168 ; GFX90A-NEXT: v_mov_b32_e32 v1, v9
7169 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7170 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
7171 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7172 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7174 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5:
7176 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7177 ; GFX940-NEXT: ;;#ASMSTART
7178 ; GFX940-NEXT: ; def v[0:5]
7179 ; GFX940-NEXT: ;;#ASMEND
7180 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
7181 ; GFX940-NEXT: ;;#ASMSTART
7182 ; GFX940-NEXT: ; def v[4:9]
7183 ; GFX940-NEXT: ;;#ASMEND
7184 ; GFX940-NEXT: s_nop 0
7185 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
7186 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
7187 ; GFX940-NEXT: v_mov_b32_e32 v0, v8
7188 ; GFX940-NEXT: v_mov_b32_e32 v1, v9
7189 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
7190 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
7191 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7192 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7193 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7194 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7195 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
7196 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7200 define void @v_shuffle_v4i64_v3i64__5_2_5_5(ptr addrspace(1) inreg %ptr) {
7201 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5:
7203 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7204 ; GFX900-NEXT: ;;#ASMSTART
7205 ; GFX900-NEXT: ; def v[0:5]
7206 ; GFX900-NEXT: ;;#ASMEND
7207 ; GFX900-NEXT: ;;#ASMSTART
7208 ; GFX900-NEXT: ; def v[6:11]
7209 ; GFX900-NEXT: ;;#ASMEND
7210 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
7211 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
7212 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
7213 ; GFX900-NEXT: v_mov_b32_e32 v2, v10
7214 ; GFX900-NEXT: v_mov_b32_e32 v3, v11
7215 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7216 ; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
7217 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7218 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7220 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5:
7222 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7223 ; GFX90A-NEXT: ;;#ASMSTART
7224 ; GFX90A-NEXT: ; def v[0:5]
7225 ; GFX90A-NEXT: ;;#ASMEND
7226 ; GFX90A-NEXT: ;;#ASMSTART
7227 ; GFX90A-NEXT: ; def v[6:11]
7228 ; GFX90A-NEXT: ;;#ASMEND
7229 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
7230 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
7231 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
7232 ; GFX90A-NEXT: v_mov_b32_e32 v2, v10
7233 ; GFX90A-NEXT: v_mov_b32_e32 v3, v11
7234 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7235 ; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
7236 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7237 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7239 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5:
7241 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7242 ; GFX940-NEXT: ;;#ASMSTART
7243 ; GFX940-NEXT: ; def v[0:5]
7244 ; GFX940-NEXT: ;;#ASMEND
7245 ; GFX940-NEXT: ;;#ASMSTART
7246 ; GFX940-NEXT: ; def v[6:11]
7247 ; GFX940-NEXT: ;;#ASMEND
7248 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
7249 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
7250 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
7251 ; GFX940-NEXT: v_mov_b32_e32 v2, v10
7252 ; GFX940-NEXT: v_mov_b32_e32 v3, v11
7253 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
7254 ; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
7255 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7256 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7257 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7258 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7259 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
7260 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7264 define void @v_shuffle_v4i64_v3i64__5_3_5_5(ptr addrspace(1) inreg %ptr) {
7265 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
7267 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7268 ; GFX900-NEXT: ;;#ASMSTART
7269 ; GFX900-NEXT: ; def v[0:5]
7270 ; GFX900-NEXT: ;;#ASMEND
7271 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
7272 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
7273 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
7274 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7275 ; GFX900-NEXT: s_nop 0
7276 ; GFX900-NEXT: v_mov_b32_e32 v4, v0
7277 ; GFX900-NEXT: v_mov_b32_e32 v5, v1
7278 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7279 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7280 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7282 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
7284 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7285 ; GFX90A-NEXT: ;;#ASMSTART
7286 ; GFX90A-NEXT: ; def v[0:5]
7287 ; GFX90A-NEXT: ;;#ASMEND
7288 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
7289 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
7290 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
7291 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7292 ; GFX90A-NEXT: s_nop 0
7293 ; GFX90A-NEXT: v_mov_b32_e32 v4, v0
7294 ; GFX90A-NEXT: v_mov_b32_e32 v5, v1
7295 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7296 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7297 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7299 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
7301 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7302 ; GFX940-NEXT: ;;#ASMSTART
7303 ; GFX940-NEXT: ; def v[0:5]
7304 ; GFX940-NEXT: ;;#ASMEND
7305 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
7306 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
7307 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
7308 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7309 ; GFX940-NEXT: s_nop 1
7310 ; GFX940-NEXT: v_mov_b32_e32 v4, v0
7311 ; GFX940-NEXT: v_mov_b32_e32 v5, v1
7312 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7313 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7314 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7315 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7316 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7317 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
7318 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7322 define void @v_shuffle_v4i64_v3i64__5_4_5_5(ptr addrspace(1) inreg %ptr) {
7323 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
7325 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7326 ; GFX900-NEXT: ;;#ASMSTART
7327 ; GFX900-NEXT: ; def v[0:5]
7328 ; GFX900-NEXT: ;;#ASMEND
7329 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
7330 ; GFX900-NEXT: v_mov_b32_e32 v6, v4
7331 ; GFX900-NEXT: v_mov_b32_e32 v7, v5
7332 ; GFX900-NEXT: v_mov_b32_e32 v8, v4
7333 ; GFX900-NEXT: v_mov_b32_e32 v9, v5
7334 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
7335 ; GFX900-NEXT: v_mov_b32_e32 v1, v5
7336 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7337 ; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
7338 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7339 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7341 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
7343 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7344 ; GFX90A-NEXT: ;;#ASMSTART
7345 ; GFX90A-NEXT: ; def v[0:5]
7346 ; GFX90A-NEXT: ;;#ASMEND
7347 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
7348 ; GFX90A-NEXT: v_mov_b32_e32 v6, v4
7349 ; GFX90A-NEXT: v_mov_b32_e32 v7, v5
7350 ; GFX90A-NEXT: v_mov_b32_e32 v8, v4
7351 ; GFX90A-NEXT: v_mov_b32_e32 v9, v5
7352 ; GFX90A-NEXT: v_mov_b32_e32 v0, v4
7353 ; GFX90A-NEXT: v_mov_b32_e32 v1, v5
7354 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7355 ; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
7356 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7357 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7359 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
7361 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7362 ; GFX940-NEXT: ;;#ASMSTART
7363 ; GFX940-NEXT: ; def v[0:5]
7364 ; GFX940-NEXT: ;;#ASMEND
7365 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
7366 ; GFX940-NEXT: v_mov_b32_e32 v6, v4
7367 ; GFX940-NEXT: v_mov_b32_e32 v7, v5
7368 ; GFX940-NEXT: v_mov_b32_e32 v8, v4
7369 ; GFX940-NEXT: v_mov_b32_e32 v9, v5
7370 ; GFX940-NEXT: v_mov_b32_e32 v0, v4
7371 ; GFX940-NEXT: v_mov_b32_e32 v1, v5
7372 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
7373 ; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
7374 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7375 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7376 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7377 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7378 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
7379 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7383 define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) {
7384 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
7386 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7387 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
7388 ; GFX900-NEXT: ;;#ASMSTART
7389 ; GFX900-NEXT: ; def v[0:5]
7390 ; GFX900-NEXT: ;;#ASMEND
7391 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7392 ; GFX900-NEXT: s_nop 0
7393 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
7394 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
7395 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7396 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7397 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7399 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
7401 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7402 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
7403 ; GFX90A-NEXT: ;;#ASMSTART
7404 ; GFX90A-NEXT: ; def v[0:5]
7405 ; GFX90A-NEXT: ;;#ASMEND
7406 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7407 ; GFX90A-NEXT: s_nop 0
7408 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
7409 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
7410 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7411 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7412 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7414 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
7416 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7417 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
7418 ; GFX940-NEXT: ;;#ASMSTART
7419 ; GFX940-NEXT: ; def v[0:5]
7420 ; GFX940-NEXT: ;;#ASMEND
7421 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7422 ; GFX940-NEXT: s_nop 1
7423 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
7424 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
7425 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7426 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7427 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7428 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7429 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7430 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
7431 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7435 define void @v_shuffle_v4i64_v3i64__5_5_0_5(ptr addrspace(1) inreg %ptr) {
7436 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5:
7438 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7439 ; GFX900-NEXT: ;;#ASMSTART
7440 ; GFX900-NEXT: ; def v[0:5]
7441 ; GFX900-NEXT: ;;#ASMEND
7442 ; GFX900-NEXT: ;;#ASMSTART
7443 ; GFX900-NEXT: ; def v[2:7]
7444 ; GFX900-NEXT: ;;#ASMEND
7445 ; GFX900-NEXT: v_mov_b32_e32 v8, 0
7446 ; GFX900-NEXT: v_mov_b32_e32 v2, v6
7447 ; GFX900-NEXT: v_mov_b32_e32 v3, v7
7448 ; GFX900-NEXT: v_mov_b32_e32 v4, v6
7449 ; GFX900-NEXT: v_mov_b32_e32 v5, v7
7450 ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
7451 ; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
7452 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7453 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7455 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5:
7457 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7458 ; GFX90A-NEXT: ;;#ASMSTART
7459 ; GFX90A-NEXT: ; def v[0:5]
7460 ; GFX90A-NEXT: ;;#ASMEND
7461 ; GFX90A-NEXT: ;;#ASMSTART
7462 ; GFX90A-NEXT: ; def v[2:7]
7463 ; GFX90A-NEXT: ;;#ASMEND
7464 ; GFX90A-NEXT: v_mov_b32_e32 v8, 0
7465 ; GFX90A-NEXT: v_mov_b32_e32 v2, v6
7466 ; GFX90A-NEXT: v_mov_b32_e32 v3, v7
7467 ; GFX90A-NEXT: v_mov_b32_e32 v4, v6
7468 ; GFX90A-NEXT: v_mov_b32_e32 v5, v7
7469 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
7470 ; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
7471 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7472 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7474 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5:
7476 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7477 ; GFX940-NEXT: ;;#ASMSTART
7478 ; GFX940-NEXT: ; def v[0:5]
7479 ; GFX940-NEXT: ;;#ASMEND
7480 ; GFX940-NEXT: v_mov_b32_e32 v8, 0
7481 ; GFX940-NEXT: ;;#ASMSTART
7482 ; GFX940-NEXT: ; def v[2:7]
7483 ; GFX940-NEXT: ;;#ASMEND
7484 ; GFX940-NEXT: s_nop 0
7485 ; GFX940-NEXT: v_mov_b32_e32 v2, v6
7486 ; GFX940-NEXT: v_mov_b32_e32 v3, v7
7487 ; GFX940-NEXT: v_mov_b32_e32 v4, v6
7488 ; GFX940-NEXT: v_mov_b32_e32 v5, v7
7489 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
7490 ; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
7491 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7492 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7493 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7494 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7495 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
7496 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7500 define void @v_shuffle_v4i64_v3i64__5_5_1_5(ptr addrspace(1) inreg %ptr) {
7501 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5:
7503 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7504 ; GFX900-NEXT: ;;#ASMSTART
7505 ; GFX900-NEXT: ; def v[0:5]
7506 ; GFX900-NEXT: ;;#ASMEND
7507 ; GFX900-NEXT: ;;#ASMSTART
7508 ; GFX900-NEXT: ; def v[4:9]
7509 ; GFX900-NEXT: ;;#ASMEND
7510 ; GFX900-NEXT: v_mov_b32_e32 v10, 0
7511 ; GFX900-NEXT: v_mov_b32_e32 v4, v8
7512 ; GFX900-NEXT: v_mov_b32_e32 v5, v9
7513 ; GFX900-NEXT: v_mov_b32_e32 v6, v8
7514 ; GFX900-NEXT: v_mov_b32_e32 v7, v9
7515 ; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
7516 ; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
7517 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7518 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7520 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5:
7522 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7523 ; GFX90A-NEXT: ;;#ASMSTART
7524 ; GFX90A-NEXT: ; def v[0:5]
7525 ; GFX90A-NEXT: ;;#ASMEND
7526 ; GFX90A-NEXT: ;;#ASMSTART
7527 ; GFX90A-NEXT: ; def v[4:9]
7528 ; GFX90A-NEXT: ;;#ASMEND
7529 ; GFX90A-NEXT: v_mov_b32_e32 v10, 0
7530 ; GFX90A-NEXT: v_mov_b32_e32 v4, v8
7531 ; GFX90A-NEXT: v_mov_b32_e32 v5, v9
7532 ; GFX90A-NEXT: v_mov_b32_e32 v6, v8
7533 ; GFX90A-NEXT: v_mov_b32_e32 v7, v9
7534 ; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
7535 ; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
7536 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7537 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7539 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5:
7541 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7542 ; GFX940-NEXT: ;;#ASMSTART
7543 ; GFX940-NEXT: ; def v[0:5]
7544 ; GFX940-NEXT: ;;#ASMEND
7545 ; GFX940-NEXT: v_mov_b32_e32 v10, 0
7546 ; GFX940-NEXT: ;;#ASMSTART
7547 ; GFX940-NEXT: ; def v[4:9]
7548 ; GFX940-NEXT: ;;#ASMEND
7549 ; GFX940-NEXT: s_nop 0
7550 ; GFX940-NEXT: v_mov_b32_e32 v4, v8
7551 ; GFX940-NEXT: v_mov_b32_e32 v5, v9
7552 ; GFX940-NEXT: v_mov_b32_e32 v6, v8
7553 ; GFX940-NEXT: v_mov_b32_e32 v7, v9
7554 ; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
7555 ; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
7556 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7557 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7558 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7559 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7560 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
7561 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7565 define void @v_shuffle_v4i64_v3i64__5_5_2_5(ptr addrspace(1) inreg %ptr) {
7566 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5:
7568 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7569 ; GFX900-NEXT: ;;#ASMSTART
7570 ; GFX900-NEXT: ; def v[6:11]
7571 ; GFX900-NEXT: ;;#ASMEND
7572 ; GFX900-NEXT: v_mov_b32_e32 v12, 0
7573 ; GFX900-NEXT: ;;#ASMSTART
7574 ; GFX900-NEXT: ; def v[0:5]
7575 ; GFX900-NEXT: ;;#ASMEND
7576 ; GFX900-NEXT: v_mov_b32_e32 v8, v4
7577 ; GFX900-NEXT: v_mov_b32_e32 v9, v5
7578 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7579 ; GFX900-NEXT: s_nop 0
7580 ; GFX900-NEXT: v_mov_b32_e32 v8, v10
7581 ; GFX900-NEXT: v_mov_b32_e32 v9, v11
7582 ; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
7583 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7584 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7586 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5:
7588 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7589 ; GFX90A-NEXT: ;;#ASMSTART
7590 ; GFX90A-NEXT: ; def v[6:11]
7591 ; GFX90A-NEXT: ;;#ASMEND
7592 ; GFX90A-NEXT: v_mov_b32_e32 v12, 0
7593 ; GFX90A-NEXT: ;;#ASMSTART
7594 ; GFX90A-NEXT: ; def v[0:5]
7595 ; GFX90A-NEXT: ;;#ASMEND
7596 ; GFX90A-NEXT: v_mov_b32_e32 v8, v4
7597 ; GFX90A-NEXT: v_mov_b32_e32 v9, v5
7598 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7599 ; GFX90A-NEXT: s_nop 0
7600 ; GFX90A-NEXT: v_mov_b32_e32 v8, v10
7601 ; GFX90A-NEXT: v_mov_b32_e32 v9, v11
7602 ; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
7603 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7604 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7606 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5:
7608 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7609 ; GFX940-NEXT: ;;#ASMSTART
7610 ; GFX940-NEXT: ; def v[6:11]
7611 ; GFX940-NEXT: ;;#ASMEND
7612 ; GFX940-NEXT: v_mov_b32_e32 v12, 0
7613 ; GFX940-NEXT: ;;#ASMSTART
7614 ; GFX940-NEXT: ; def v[0:5]
7615 ; GFX940-NEXT: ;;#ASMEND
7616 ; GFX940-NEXT: s_nop 0
7617 ; GFX940-NEXT: v_mov_b32_e32 v8, v4
7618 ; GFX940-NEXT: v_mov_b32_e32 v9, v5
7619 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
7620 ; GFX940-NEXT: s_nop 1
7621 ; GFX940-NEXT: v_mov_b32_e32 v8, v10
7622 ; GFX940-NEXT: v_mov_b32_e32 v9, v11
7623 ; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
7624 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7625 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7626 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7627 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7628 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
7629 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7633 define void @v_shuffle_v4i64_v3i64__5_5_3_5(ptr addrspace(1) inreg %ptr) {
7634 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5:
7636 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7637 ; GFX900-NEXT: ;;#ASMSTART
7638 ; GFX900-NEXT: ; def v[0:5]
7639 ; GFX900-NEXT: ;;#ASMEND
7640 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
7641 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
7642 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
7643 ; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
7644 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7645 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7646 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7648 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5:
7650 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7651 ; GFX90A-NEXT: ;;#ASMSTART
7652 ; GFX90A-NEXT: ; def v[0:5]
7653 ; GFX90A-NEXT: ;;#ASMEND
7654 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
7655 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
7656 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
7657 ; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
7658 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7659 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7660 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7662 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5:
7664 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7665 ; GFX940-NEXT: ;;#ASMSTART
7666 ; GFX940-NEXT: ; def v[0:5]
7667 ; GFX940-NEXT: ;;#ASMEND
7668 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
7669 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
7670 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
7671 ; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
7672 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7673 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7674 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7675 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7676 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7677 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
7678 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7682 define void @v_shuffle_v4i64_v3i64__5_5_4_5(ptr addrspace(1) inreg %ptr) {
7683 ; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
7685 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7686 ; GFX900-NEXT: v_mov_b32_e32 v6, 0
7687 ; GFX900-NEXT: ;;#ASMSTART
7688 ; GFX900-NEXT: ; def v[0:5]
7689 ; GFX900-NEXT: ;;#ASMEND
7690 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7691 ; GFX900-NEXT: s_nop 0
7692 ; GFX900-NEXT: v_mov_b32_e32 v2, v4
7693 ; GFX900-NEXT: v_mov_b32_e32 v3, v5
7694 ; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7695 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7696 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7698 ; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
7700 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7701 ; GFX90A-NEXT: v_mov_b32_e32 v6, 0
7702 ; GFX90A-NEXT: ;;#ASMSTART
7703 ; GFX90A-NEXT: ; def v[0:5]
7704 ; GFX90A-NEXT: ;;#ASMEND
7705 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7706 ; GFX90A-NEXT: s_nop 0
7707 ; GFX90A-NEXT: v_mov_b32_e32 v2, v4
7708 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5
7709 ; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
7710 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7711 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7713 ; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
7715 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7716 ; GFX940-NEXT: v_mov_b32_e32 v6, 0
7717 ; GFX940-NEXT: ;;#ASMSTART
7718 ; GFX940-NEXT: ; def v[0:5]
7719 ; GFX940-NEXT: ;;#ASMEND
7720 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7721 ; GFX940-NEXT: s_nop 1
7722 ; GFX940-NEXT: v_mov_b32_e32 v2, v4
7723 ; GFX940-NEXT: v_mov_b32_e32 v3, v5
7724 ; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7725 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7726 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7727 %vec0 = call <3 x i64> asm "; def $0", "=v"()
7728 %vec1 = call <3 x i64> asm "; def $0", "=v"()
7729 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
7730 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7734 define void @s_shuffle_v4i64_v3i64__u_u_u_u() {
7735 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_u_u_u:
7737 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7738 ; GFX9-NEXT: ;;#ASMSTART
7739 ; GFX9-NEXT: ; use s[8:15]
7740 ; GFX9-NEXT: ;;#ASMEND
7741 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7742 %vec0 = call <3 x i64> asm "; def $0", "=s"()
7743 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> poison
7744 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7748 define void @s_shuffle_v4i64_v3i64__0_u_u_u() {
7749 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u:
7751 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7752 ; GFX900-NEXT: ;;#ASMSTART
7753 ; GFX900-NEXT: ; def s[8:13]
7754 ; GFX900-NEXT: ;;#ASMEND
7755 ; GFX900-NEXT: ;;#ASMSTART
7756 ; GFX900-NEXT: ; use s[8:15]
7757 ; GFX900-NEXT: ;;#ASMEND
7758 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7760 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u:
7762 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7763 ; GFX90A-NEXT: ;;#ASMSTART
7764 ; GFX90A-NEXT: ; def s[8:13]
7765 ; GFX90A-NEXT: ;;#ASMEND
7766 ; GFX90A-NEXT: ;;#ASMSTART
7767 ; GFX90A-NEXT: ; use s[8:15]
7768 ; GFX90A-NEXT: ;;#ASMEND
7769 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7771 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u:
7773 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7774 ; GFX940-NEXT: ;;#ASMSTART
7775 ; GFX940-NEXT: ; def s[8:13]
7776 ; GFX940-NEXT: ;;#ASMEND
7777 ; GFX940-NEXT: s_nop 0
7778 ; GFX940-NEXT: ;;#ASMSTART
7779 ; GFX940-NEXT: ; use s[8:15]
7780 ; GFX940-NEXT: ;;#ASMEND
7781 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7782 %vec0 = call <3 x i64> asm "; def $0", "=s"()
7783 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
7784 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7788 define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
7789 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
7791 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7792 ; GFX900-NEXT: ;;#ASMSTART
7793 ; GFX900-NEXT: ; def s[4:9]
7794 ; GFX900-NEXT: ;;#ASMEND
7795 ; GFX900-NEXT: s_mov_b32 s8, s6
7796 ; GFX900-NEXT: s_mov_b32 s9, s7
7797 ; GFX900-NEXT: ;;#ASMSTART
7798 ; GFX900-NEXT: ; use s[8:15]
7799 ; GFX900-NEXT: ;;#ASMEND
7800 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7802 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
7804 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7805 ; GFX90A-NEXT: ;;#ASMSTART
7806 ; GFX90A-NEXT: ; def s[4:9]
7807 ; GFX90A-NEXT: ;;#ASMEND
7808 ; GFX90A-NEXT: s_mov_b32 s8, s6
7809 ; GFX90A-NEXT: s_mov_b32 s9, s7
7810 ; GFX90A-NEXT: ;;#ASMSTART
7811 ; GFX90A-NEXT: ; use s[8:15]
7812 ; GFX90A-NEXT: ;;#ASMEND
7813 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7815 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
7817 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7818 ; GFX940-NEXT: ;;#ASMSTART
7819 ; GFX940-NEXT: ; def s[0:5]
7820 ; GFX940-NEXT: ;;#ASMEND
7821 ; GFX940-NEXT: s_mov_b32 s8, s2
7822 ; GFX940-NEXT: s_mov_b32 s9, s3
7823 ; GFX940-NEXT: ;;#ASMSTART
7824 ; GFX940-NEXT: ; use s[8:15]
7825 ; GFX940-NEXT: ;;#ASMEND
7826 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7827 %vec0 = call <3 x i64> asm "; def $0", "=s"()
7828 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
7829 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7833 define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
7834 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
7836 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7837 ; GFX900-NEXT: ;;#ASMSTART
7838 ; GFX900-NEXT: ; def s[4:9]
7839 ; GFX900-NEXT: ;;#ASMEND
7840 ; GFX900-NEXT: ;;#ASMSTART
7841 ; GFX900-NEXT: ; use s[8:15]
7842 ; GFX900-NEXT: ;;#ASMEND
7843 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7845 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
7847 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7848 ; GFX90A-NEXT: ;;#ASMSTART
7849 ; GFX90A-NEXT: ; def s[4:9]
7850 ; GFX90A-NEXT: ;;#ASMEND
7851 ; GFX90A-NEXT: ;;#ASMSTART
7852 ; GFX90A-NEXT: ; use s[8:15]
7853 ; GFX90A-NEXT: ;;#ASMEND
7854 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7856 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
7858 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7859 ; GFX940-NEXT: ;;#ASMSTART
7860 ; GFX940-NEXT: ; def s[0:5]
7861 ; GFX940-NEXT: ;;#ASMEND
7862 ; GFX940-NEXT: s_mov_b32 s8, s4
7863 ; GFX940-NEXT: s_mov_b32 s9, s5
7864 ; GFX940-NEXT: ;;#ASMSTART
7865 ; GFX940-NEXT: ; use s[8:15]
7866 ; GFX940-NEXT: ;;#ASMEND
7867 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7868 %vec0 = call <3 x i64> asm "; def $0", "=s"()
7869 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
7870 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7874 define void @s_shuffle_v4i64_v3i64__3_u_u_u() {
7875 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_u_u_u:
7877 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7878 ; GFX9-NEXT: ;;#ASMSTART
7879 ; GFX9-NEXT: ; use s[8:15]
7880 ; GFX9-NEXT: ;;#ASMEND
7881 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7882 %vec0 = call <3 x i64> asm "; def $0", "=s"()
7883 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
7884 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7888 define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
7889 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
7891 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7892 ; GFX900-NEXT: ;;#ASMSTART
7893 ; GFX900-NEXT: ; def s[4:9]
7894 ; GFX900-NEXT: ;;#ASMEND
7895 ; GFX900-NEXT: s_mov_b32 s8, s6
7896 ; GFX900-NEXT: s_mov_b32 s9, s7
7897 ; GFX900-NEXT: ;;#ASMSTART
7898 ; GFX900-NEXT: ; use s[8:15]
7899 ; GFX900-NEXT: ;;#ASMEND
7900 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7902 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
7904 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7905 ; GFX90A-NEXT: ;;#ASMSTART
7906 ; GFX90A-NEXT: ; def s[4:9]
7907 ; GFX90A-NEXT: ;;#ASMEND
7908 ; GFX90A-NEXT: s_mov_b32 s8, s6
7909 ; GFX90A-NEXT: s_mov_b32 s9, s7
7910 ; GFX90A-NEXT: ;;#ASMSTART
7911 ; GFX90A-NEXT: ; use s[8:15]
7912 ; GFX90A-NEXT: ;;#ASMEND
7913 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7915 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
7917 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7918 ; GFX940-NEXT: ;;#ASMSTART
7919 ; GFX940-NEXT: ; def s[0:5]
7920 ; GFX940-NEXT: ;;#ASMEND
7921 ; GFX940-NEXT: s_mov_b32 s8, s2
7922 ; GFX940-NEXT: s_mov_b32 s9, s3
7923 ; GFX940-NEXT: ;;#ASMSTART
7924 ; GFX940-NEXT: ; use s[8:15]
7925 ; GFX940-NEXT: ;;#ASMEND
7926 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7927 %vec0 = call <3 x i64> asm "; def $0", "=s"()
7928 %vec1 = call <3 x i64> asm "; def $0", "=s"()
7929 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
7930 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7934 define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
7935 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
7937 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7938 ; GFX900-NEXT: ;;#ASMSTART
7939 ; GFX900-NEXT: ; def s[4:9]
7940 ; GFX900-NEXT: ;;#ASMEND
7941 ; GFX900-NEXT: ;;#ASMSTART
7942 ; GFX900-NEXT: ; use s[8:15]
7943 ; GFX900-NEXT: ;;#ASMEND
7944 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7946 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
7948 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7949 ; GFX90A-NEXT: ;;#ASMSTART
7950 ; GFX90A-NEXT: ; def s[4:9]
7951 ; GFX90A-NEXT: ;;#ASMEND
7952 ; GFX90A-NEXT: ;;#ASMSTART
7953 ; GFX90A-NEXT: ; use s[8:15]
7954 ; GFX90A-NEXT: ;;#ASMEND
7955 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7957 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
7959 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7960 ; GFX940-NEXT: ;;#ASMSTART
7961 ; GFX940-NEXT: ; def s[0:5]
7962 ; GFX940-NEXT: ;;#ASMEND
7963 ; GFX940-NEXT: s_mov_b32 s8, s4
7964 ; GFX940-NEXT: s_mov_b32 s9, s5
7965 ; GFX940-NEXT: ;;#ASMSTART
7966 ; GFX940-NEXT: ; use s[8:15]
7967 ; GFX940-NEXT: ;;#ASMEND
7968 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7969 %vec0 = call <3 x i64> asm "; def $0", "=s"()
7970 %vec1 = call <3 x i64> asm "; def $0", "=s"()
7971 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
7972 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7976 define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
7977 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u:
7979 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7980 ; GFX900-NEXT: ;;#ASMSTART
7981 ; GFX900-NEXT: ; def s[4:9]
7982 ; GFX900-NEXT: ;;#ASMEND
7983 ; GFX900-NEXT: ;;#ASMSTART
7984 ; GFX900-NEXT: ; def s[8:13]
7985 ; GFX900-NEXT: ;;#ASMEND
7986 ; GFX900-NEXT: s_mov_b32 s8, s12
7987 ; GFX900-NEXT: s_mov_b32 s9, s13
7988 ; GFX900-NEXT: s_mov_b32 s10, s4
7989 ; GFX900-NEXT: s_mov_b32 s11, s5
7990 ; GFX900-NEXT: ;;#ASMSTART
7991 ; GFX900-NEXT: ; use s[8:15]
7992 ; GFX900-NEXT: ;;#ASMEND
7993 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7995 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u:
7997 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7998 ; GFX90A-NEXT: ;;#ASMSTART
7999 ; GFX90A-NEXT: ; def s[4:9]
8000 ; GFX90A-NEXT: ;;#ASMEND
8001 ; GFX90A-NEXT: ;;#ASMSTART
8002 ; GFX90A-NEXT: ; def s[8:13]
8003 ; GFX90A-NEXT: ;;#ASMEND
8004 ; GFX90A-NEXT: s_mov_b32 s8, s12
8005 ; GFX90A-NEXT: s_mov_b32 s9, s13
8006 ; GFX90A-NEXT: s_mov_b32 s10, s4
8007 ; GFX90A-NEXT: s_mov_b32 s11, s5
8008 ; GFX90A-NEXT: ;;#ASMSTART
8009 ; GFX90A-NEXT: ; use s[8:15]
8010 ; GFX90A-NEXT: ;;#ASMEND
8011 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8013 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u:
8015 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8016 ; GFX940-NEXT: ;;#ASMSTART
8017 ; GFX940-NEXT: ; def s[0:5]
8018 ; GFX940-NEXT: ;;#ASMEND
8019 ; GFX940-NEXT: s_mov_b32 s10, s0
8020 ; GFX940-NEXT: ;;#ASMSTART
8021 ; GFX940-NEXT: ; def s[4:9]
8022 ; GFX940-NEXT: ;;#ASMEND
8023 ; GFX940-NEXT: s_mov_b32 s11, s1
8024 ; GFX940-NEXT: ;;#ASMSTART
8025 ; GFX940-NEXT: ; use s[8:15]
8026 ; GFX940-NEXT: ;;#ASMEND
8027 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8028 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8029 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8030 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
8031 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8035 define void @s_shuffle_v4i64_v3i64__5_1_u_u() {
8036 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u:
8038 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8039 ; GFX900-NEXT: ;;#ASMSTART
8040 ; GFX900-NEXT: ; def s[8:13]
8041 ; GFX900-NEXT: ;;#ASMEND
8042 ; GFX900-NEXT: ;;#ASMSTART
8043 ; GFX900-NEXT: ; def s[4:9]
8044 ; GFX900-NEXT: ;;#ASMEND
8045 ; GFX900-NEXT: ;;#ASMSTART
8046 ; GFX900-NEXT: ; use s[8:15]
8047 ; GFX900-NEXT: ;;#ASMEND
8048 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8050 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u:
8052 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8053 ; GFX90A-NEXT: ;;#ASMSTART
8054 ; GFX90A-NEXT: ; def s[8:13]
8055 ; GFX90A-NEXT: ;;#ASMEND
8056 ; GFX90A-NEXT: ;;#ASMSTART
8057 ; GFX90A-NEXT: ; def s[4:9]
8058 ; GFX90A-NEXT: ;;#ASMEND
8059 ; GFX90A-NEXT: ;;#ASMSTART
8060 ; GFX90A-NEXT: ; use s[8:15]
8061 ; GFX90A-NEXT: ;;#ASMEND
8062 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8064 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u:
8066 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8067 ; GFX940-NEXT: ;;#ASMSTART
8068 ; GFX940-NEXT: ; def s[8:13]
8069 ; GFX940-NEXT: ;;#ASMEND
8070 ; GFX940-NEXT: ;;#ASMSTART
8071 ; GFX940-NEXT: ; def s[0:5]
8072 ; GFX940-NEXT: ;;#ASMEND
8073 ; GFX940-NEXT: s_mov_b32 s8, s4
8074 ; GFX940-NEXT: s_mov_b32 s9, s5
8075 ; GFX940-NEXT: ;;#ASMSTART
8076 ; GFX940-NEXT: ; use s[8:15]
8077 ; GFX940-NEXT: ;;#ASMEND
8078 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8079 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8080 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8081 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
8082 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8086 define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
8087 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u:
8089 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8090 ; GFX900-NEXT: ;;#ASMSTART
8091 ; GFX900-NEXT: ; def s[8:13]
8092 ; GFX900-NEXT: ;;#ASMEND
8093 ; GFX900-NEXT: ;;#ASMSTART
8094 ; GFX900-NEXT: ; def s[4:9]
8095 ; GFX900-NEXT: ;;#ASMEND
8096 ; GFX900-NEXT: s_mov_b32 s10, s12
8097 ; GFX900-NEXT: s_mov_b32 s11, s13
8098 ; GFX900-NEXT: ;;#ASMSTART
8099 ; GFX900-NEXT: ; use s[8:15]
8100 ; GFX900-NEXT: ;;#ASMEND
8101 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8103 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u:
8105 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8106 ; GFX90A-NEXT: ;;#ASMSTART
8107 ; GFX90A-NEXT: ; def s[8:13]
8108 ; GFX90A-NEXT: ;;#ASMEND
8109 ; GFX90A-NEXT: ;;#ASMSTART
8110 ; GFX90A-NEXT: ; def s[4:9]
8111 ; GFX90A-NEXT: ;;#ASMEND
8112 ; GFX90A-NEXT: s_mov_b32 s10, s12
8113 ; GFX90A-NEXT: s_mov_b32 s11, s13
8114 ; GFX90A-NEXT: ;;#ASMSTART
8115 ; GFX90A-NEXT: ; use s[8:15]
8116 ; GFX90A-NEXT: ;;#ASMEND
8117 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8119 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u:
8121 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8122 ; GFX940-NEXT: ;;#ASMSTART
8123 ; GFX940-NEXT: ; def s[8:13]
8124 ; GFX940-NEXT: ;;#ASMEND
8125 ; GFX940-NEXT: ;;#ASMSTART
8126 ; GFX940-NEXT: ; def s[0:5]
8127 ; GFX940-NEXT: ;;#ASMEND
8128 ; GFX940-NEXT: s_mov_b32 s8, s12
8129 ; GFX940-NEXT: s_mov_b32 s9, s13
8130 ; GFX940-NEXT: s_mov_b32 s10, s4
8131 ; GFX940-NEXT: s_mov_b32 s11, s5
8132 ; GFX940-NEXT: ;;#ASMSTART
8133 ; GFX940-NEXT: ; use s[8:15]
8134 ; GFX940-NEXT: ;;#ASMEND
8135 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8136 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8137 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8138 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
8139 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8143 define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
8144 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
8146 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8147 ; GFX900-NEXT: ;;#ASMSTART
8148 ; GFX900-NEXT: ; def s[4:9]
8149 ; GFX900-NEXT: ;;#ASMEND
8150 ; GFX900-NEXT: s_mov_b32 s10, s4
8151 ; GFX900-NEXT: s_mov_b32 s11, s5
8152 ; GFX900-NEXT: ;;#ASMSTART
8153 ; GFX900-NEXT: ; use s[8:15]
8154 ; GFX900-NEXT: ;;#ASMEND
8155 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8157 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
8159 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8160 ; GFX90A-NEXT: ;;#ASMSTART
8161 ; GFX90A-NEXT: ; def s[4:9]
8162 ; GFX90A-NEXT: ;;#ASMEND
8163 ; GFX90A-NEXT: s_mov_b32 s10, s4
8164 ; GFX90A-NEXT: s_mov_b32 s11, s5
8165 ; GFX90A-NEXT: ;;#ASMSTART
8166 ; GFX90A-NEXT: ; use s[8:15]
8167 ; GFX90A-NEXT: ;;#ASMEND
8168 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8170 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
8172 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8173 ; GFX940-NEXT: ;;#ASMSTART
8174 ; GFX940-NEXT: ; def s[0:5]
8175 ; GFX940-NEXT: ;;#ASMEND
8176 ; GFX940-NEXT: s_mov_b32 s8, s4
8177 ; GFX940-NEXT: s_mov_b32 s9, s5
8178 ; GFX940-NEXT: s_mov_b32 s10, s0
8179 ; GFX940-NEXT: s_mov_b32 s11, s1
8180 ; GFX940-NEXT: ;;#ASMSTART
8181 ; GFX940-NEXT: ; use s[8:15]
8182 ; GFX940-NEXT: ;;#ASMEND
8183 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8184 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8185 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8186 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
8187 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8191 define void @s_shuffle_v4i64_v3i64__5_4_u_u() {
8192 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
8194 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8195 ; GFX9-NEXT: ;;#ASMSTART
8196 ; GFX9-NEXT: ; def s[8:13]
8197 ; GFX9-NEXT: ;;#ASMEND
8198 ; GFX9-NEXT: s_mov_b32 s8, s12
8199 ; GFX9-NEXT: s_mov_b32 s9, s13
8200 ; GFX9-NEXT: ;;#ASMSTART
8201 ; GFX9-NEXT: ; use s[8:15]
8202 ; GFX9-NEXT: ;;#ASMEND
8203 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8204 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8205 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8206 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
8207 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8211 define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
8212 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
8214 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8215 ; GFX900-NEXT: ;;#ASMSTART
8216 ; GFX900-NEXT: ; def s[8:13]
8217 ; GFX900-NEXT: ;;#ASMEND
8218 ; GFX900-NEXT: s_mov_b32 s8, s12
8219 ; GFX900-NEXT: s_mov_b32 s9, s13
8220 ; GFX900-NEXT: s_mov_b32 s10, s12
8221 ; GFX900-NEXT: s_mov_b32 s11, s13
8222 ; GFX900-NEXT: ;;#ASMSTART
8223 ; GFX900-NEXT: ; use s[8:15]
8224 ; GFX900-NEXT: ;;#ASMEND
8225 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8227 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
8229 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8230 ; GFX90A-NEXT: ;;#ASMSTART
8231 ; GFX90A-NEXT: ; def s[8:13]
8232 ; GFX90A-NEXT: ;;#ASMEND
8233 ; GFX90A-NEXT: s_mov_b32 s8, s12
8234 ; GFX90A-NEXT: s_mov_b32 s9, s13
8235 ; GFX90A-NEXT: s_mov_b32 s10, s12
8236 ; GFX90A-NEXT: s_mov_b32 s11, s13
8237 ; GFX90A-NEXT: ;;#ASMSTART
8238 ; GFX90A-NEXT: ; use s[8:15]
8239 ; GFX90A-NEXT: ;;#ASMEND
8240 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8242 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
8244 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8245 ; GFX940-NEXT: ;;#ASMSTART
8246 ; GFX940-NEXT: ; def s[0:5]
8247 ; GFX940-NEXT: ;;#ASMEND
8248 ; GFX940-NEXT: s_mov_b32 s8, s4
8249 ; GFX940-NEXT: s_mov_b32 s9, s5
8250 ; GFX940-NEXT: s_mov_b32 s10, s4
8251 ; GFX940-NEXT: s_mov_b32 s11, s5
8252 ; GFX940-NEXT: ;;#ASMSTART
8253 ; GFX940-NEXT: ; use s[8:15]
8254 ; GFX940-NEXT: ;;#ASMEND
8255 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8256 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8257 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8258 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
8259 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8263 define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
8264 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
8266 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8267 ; GFX900-NEXT: ;;#ASMSTART
8268 ; GFX900-NEXT: ; def s[4:9]
8269 ; GFX900-NEXT: ;;#ASMEND
8270 ; GFX900-NEXT: ;;#ASMSTART
8271 ; GFX900-NEXT: ; def s[8:13]
8272 ; GFX900-NEXT: ;;#ASMEND
8273 ; GFX900-NEXT: s_mov_b32 s8, s12
8274 ; GFX900-NEXT: s_mov_b32 s9, s13
8275 ; GFX900-NEXT: s_mov_b32 s10, s12
8276 ; GFX900-NEXT: s_mov_b32 s11, s13
8277 ; GFX900-NEXT: s_mov_b32 s12, s4
8278 ; GFX900-NEXT: s_mov_b32 s13, s5
8279 ; GFX900-NEXT: ;;#ASMSTART
8280 ; GFX900-NEXT: ; use s[8:15]
8281 ; GFX900-NEXT: ;;#ASMEND
8282 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8284 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
8286 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8287 ; GFX90A-NEXT: ;;#ASMSTART
8288 ; GFX90A-NEXT: ; def s[4:9]
8289 ; GFX90A-NEXT: ;;#ASMEND
8290 ; GFX90A-NEXT: ;;#ASMSTART
8291 ; GFX90A-NEXT: ; def s[8:13]
8292 ; GFX90A-NEXT: ;;#ASMEND
8293 ; GFX90A-NEXT: s_mov_b32 s8, s12
8294 ; GFX90A-NEXT: s_mov_b32 s9, s13
8295 ; GFX90A-NEXT: s_mov_b32 s10, s12
8296 ; GFX90A-NEXT: s_mov_b32 s11, s13
8297 ; GFX90A-NEXT: s_mov_b32 s12, s4
8298 ; GFX90A-NEXT: s_mov_b32 s13, s5
8299 ; GFX90A-NEXT: ;;#ASMSTART
8300 ; GFX90A-NEXT: ; use s[8:15]
8301 ; GFX90A-NEXT: ;;#ASMEND
8302 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8304 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
8306 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8307 ; GFX940-NEXT: ;;#ASMSTART
8308 ; GFX940-NEXT: ; def s[8:13]
8309 ; GFX940-NEXT: ;;#ASMEND
8310 ; GFX940-NEXT: ;;#ASMSTART
8311 ; GFX940-NEXT: ; def s[0:5]
8312 ; GFX940-NEXT: ;;#ASMEND
8313 ; GFX940-NEXT: s_mov_b32 s8, s12
8314 ; GFX940-NEXT: s_mov_b32 s9, s13
8315 ; GFX940-NEXT: s_mov_b32 s10, s12
8316 ; GFX940-NEXT: s_mov_b32 s11, s13
8317 ; GFX940-NEXT: s_mov_b32 s12, s0
8318 ; GFX940-NEXT: s_mov_b32 s13, s1
8319 ; GFX940-NEXT: ;;#ASMSTART
8320 ; GFX940-NEXT: ; use s[8:15]
8321 ; GFX940-NEXT: ;;#ASMEND
8322 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8323 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8324 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8325 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
8326 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8330 define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
8331 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u:
8333 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8334 ; GFX900-NEXT: ;;#ASMSTART
8335 ; GFX900-NEXT: ; def s[4:9]
8336 ; GFX900-NEXT: ;;#ASMEND
8337 ; GFX900-NEXT: ;;#ASMSTART
8338 ; GFX900-NEXT: ; def s[8:13]
8339 ; GFX900-NEXT: ;;#ASMEND
8340 ; GFX900-NEXT: s_mov_b32 s8, s12
8341 ; GFX900-NEXT: s_mov_b32 s9, s13
8342 ; GFX900-NEXT: s_mov_b32 s10, s12
8343 ; GFX900-NEXT: s_mov_b32 s11, s13
8344 ; GFX900-NEXT: s_mov_b32 s12, s6
8345 ; GFX900-NEXT: s_mov_b32 s13, s7
8346 ; GFX900-NEXT: ;;#ASMSTART
8347 ; GFX900-NEXT: ; use s[8:15]
8348 ; GFX900-NEXT: ;;#ASMEND
8349 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8351 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u:
8353 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8354 ; GFX90A-NEXT: ;;#ASMSTART
8355 ; GFX90A-NEXT: ; def s[4:9]
8356 ; GFX90A-NEXT: ;;#ASMEND
8357 ; GFX90A-NEXT: ;;#ASMSTART
8358 ; GFX90A-NEXT: ; def s[8:13]
8359 ; GFX90A-NEXT: ;;#ASMEND
8360 ; GFX90A-NEXT: s_mov_b32 s8, s12
8361 ; GFX90A-NEXT: s_mov_b32 s9, s13
8362 ; GFX90A-NEXT: s_mov_b32 s10, s12
8363 ; GFX90A-NEXT: s_mov_b32 s11, s13
8364 ; GFX90A-NEXT: s_mov_b32 s12, s6
8365 ; GFX90A-NEXT: s_mov_b32 s13, s7
8366 ; GFX90A-NEXT: ;;#ASMSTART
8367 ; GFX90A-NEXT: ; use s[8:15]
8368 ; GFX90A-NEXT: ;;#ASMEND
8369 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8371 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u:
8373 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8374 ; GFX940-NEXT: ;;#ASMSTART
8375 ; GFX940-NEXT: ; def s[8:13]
8376 ; GFX940-NEXT: ;;#ASMEND
8377 ; GFX940-NEXT: ;;#ASMSTART
8378 ; GFX940-NEXT: ; def s[0:5]
8379 ; GFX940-NEXT: ;;#ASMEND
8380 ; GFX940-NEXT: s_mov_b32 s8, s12
8381 ; GFX940-NEXT: s_mov_b32 s9, s13
8382 ; GFX940-NEXT: s_mov_b32 s10, s12
8383 ; GFX940-NEXT: s_mov_b32 s11, s13
8384 ; GFX940-NEXT: s_mov_b32 s12, s2
8385 ; GFX940-NEXT: s_mov_b32 s13, s3
8386 ; GFX940-NEXT: ;;#ASMSTART
8387 ; GFX940-NEXT: ; use s[8:15]
8388 ; GFX940-NEXT: ;;#ASMEND
8389 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8390 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8391 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8392 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
8393 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8397 define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
8398 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u:
8400 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8401 ; GFX900-NEXT: ;;#ASMSTART
8402 ; GFX900-NEXT: ; def s[8:13]
8403 ; GFX900-NEXT: ;;#ASMEND
8404 ; GFX900-NEXT: ;;#ASMSTART
8405 ; GFX900-NEXT: ; def s[16:21]
8406 ; GFX900-NEXT: ;;#ASMEND
8407 ; GFX900-NEXT: s_mov_b32 s8, s20
8408 ; GFX900-NEXT: s_mov_b32 s9, s21
8409 ; GFX900-NEXT: s_mov_b32 s10, s20
8410 ; GFX900-NEXT: s_mov_b32 s11, s21
8411 ; GFX900-NEXT: ;;#ASMSTART
8412 ; GFX900-NEXT: ; use s[8:15]
8413 ; GFX900-NEXT: ;;#ASMEND
8414 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8416 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u:
8418 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8419 ; GFX90A-NEXT: ;;#ASMSTART
8420 ; GFX90A-NEXT: ; def s[8:13]
8421 ; GFX90A-NEXT: ;;#ASMEND
8422 ; GFX90A-NEXT: ;;#ASMSTART
8423 ; GFX90A-NEXT: ; def s[16:21]
8424 ; GFX90A-NEXT: ;;#ASMEND
8425 ; GFX90A-NEXT: s_mov_b32 s8, s20
8426 ; GFX90A-NEXT: s_mov_b32 s9, s21
8427 ; GFX90A-NEXT: s_mov_b32 s10, s20
8428 ; GFX90A-NEXT: s_mov_b32 s11, s21
8429 ; GFX90A-NEXT: ;;#ASMSTART
8430 ; GFX90A-NEXT: ; use s[8:15]
8431 ; GFX90A-NEXT: ;;#ASMEND
8432 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8434 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u:
8436 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8437 ; GFX940-NEXT: ;;#ASMSTART
8438 ; GFX940-NEXT: ; def s[8:13]
8439 ; GFX940-NEXT: ;;#ASMEND
8440 ; GFX940-NEXT: ;;#ASMSTART
8441 ; GFX940-NEXT: ; def s[0:5]
8442 ; GFX940-NEXT: ;;#ASMEND
8443 ; GFX940-NEXT: s_mov_b32 s8, s4
8444 ; GFX940-NEXT: s_mov_b32 s9, s5
8445 ; GFX940-NEXT: s_mov_b32 s10, s4
8446 ; GFX940-NEXT: s_mov_b32 s11, s5
8447 ; GFX940-NEXT: ;;#ASMSTART
8448 ; GFX940-NEXT: ; use s[8:15]
8449 ; GFX940-NEXT: ;;#ASMEND
8450 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8451 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8452 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8453 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
8454 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8458 define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
8459 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
8461 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8462 ; GFX900-NEXT: ;;#ASMSTART
8463 ; GFX900-NEXT: ; def s[12:17]
8464 ; GFX900-NEXT: ;;#ASMEND
8465 ; GFX900-NEXT: s_mov_b32 s8, s16
8466 ; GFX900-NEXT: s_mov_b32 s9, s17
8467 ; GFX900-NEXT: s_mov_b32 s10, s16
8468 ; GFX900-NEXT: s_mov_b32 s11, s17
8469 ; GFX900-NEXT: ;;#ASMSTART
8470 ; GFX900-NEXT: ; use s[8:15]
8471 ; GFX900-NEXT: ;;#ASMEND
8472 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8474 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
8476 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8477 ; GFX90A-NEXT: ;;#ASMSTART
8478 ; GFX90A-NEXT: ; def s[12:17]
8479 ; GFX90A-NEXT: ;;#ASMEND
8480 ; GFX90A-NEXT: s_mov_b32 s8, s16
8481 ; GFX90A-NEXT: s_mov_b32 s9, s17
8482 ; GFX90A-NEXT: s_mov_b32 s10, s16
8483 ; GFX90A-NEXT: s_mov_b32 s11, s17
8484 ; GFX90A-NEXT: ;;#ASMSTART
8485 ; GFX90A-NEXT: ; use s[8:15]
8486 ; GFX90A-NEXT: ;;#ASMEND
8487 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8489 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
8491 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8492 ; GFX940-NEXT: ;;#ASMSTART
8493 ; GFX940-NEXT: ; def s[0:5]
8494 ; GFX940-NEXT: ;;#ASMEND
8495 ; GFX940-NEXT: s_mov_b32 s8, s4
8496 ; GFX940-NEXT: s_mov_b32 s9, s5
8497 ; GFX940-NEXT: s_mov_b32 s10, s4
8498 ; GFX940-NEXT: s_mov_b32 s11, s5
8499 ; GFX940-NEXT: s_mov_b32 s12, s0
8500 ; GFX940-NEXT: s_mov_b32 s13, s1
8501 ; GFX940-NEXT: ;;#ASMSTART
8502 ; GFX940-NEXT: ; use s[8:15]
8503 ; GFX940-NEXT: ;;#ASMEND
8504 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8505 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8506 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8507 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
8508 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8512 define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
8513 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
8515 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8516 ; GFX900-NEXT: ;;#ASMSTART
8517 ; GFX900-NEXT: ; def s[12:17]
8518 ; GFX900-NEXT: ;;#ASMEND
8519 ; GFX900-NEXT: s_mov_b32 s8, s16
8520 ; GFX900-NEXT: s_mov_b32 s9, s17
8521 ; GFX900-NEXT: s_mov_b32 s10, s16
8522 ; GFX900-NEXT: s_mov_b32 s11, s17
8523 ; GFX900-NEXT: s_mov_b32 s12, s14
8524 ; GFX900-NEXT: s_mov_b32 s13, s15
8525 ; GFX900-NEXT: ;;#ASMSTART
8526 ; GFX900-NEXT: ; use s[8:15]
8527 ; GFX900-NEXT: ;;#ASMEND
8528 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8530 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
8532 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8533 ; GFX90A-NEXT: ;;#ASMSTART
8534 ; GFX90A-NEXT: ; def s[12:17]
8535 ; GFX90A-NEXT: ;;#ASMEND
8536 ; GFX90A-NEXT: s_mov_b32 s8, s16
8537 ; GFX90A-NEXT: s_mov_b32 s9, s17
8538 ; GFX90A-NEXT: s_mov_b32 s10, s16
8539 ; GFX90A-NEXT: s_mov_b32 s11, s17
8540 ; GFX90A-NEXT: s_mov_b32 s12, s14
8541 ; GFX90A-NEXT: s_mov_b32 s13, s15
8542 ; GFX90A-NEXT: ;;#ASMSTART
8543 ; GFX90A-NEXT: ; use s[8:15]
8544 ; GFX90A-NEXT: ;;#ASMEND
8545 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8547 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
8549 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8550 ; GFX940-NEXT: ;;#ASMSTART
8551 ; GFX940-NEXT: ; def s[0:5]
8552 ; GFX940-NEXT: ;;#ASMEND
8553 ; GFX940-NEXT: s_mov_b32 s8, s4
8554 ; GFX940-NEXT: s_mov_b32 s9, s5
8555 ; GFX940-NEXT: s_mov_b32 s10, s4
8556 ; GFX940-NEXT: s_mov_b32 s11, s5
8557 ; GFX940-NEXT: s_mov_b32 s12, s2
8558 ; GFX940-NEXT: s_mov_b32 s13, s3
8559 ; GFX940-NEXT: ;;#ASMSTART
8560 ; GFX940-NEXT: ; use s[8:15]
8561 ; GFX940-NEXT: ;;#ASMEND
8562 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8563 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8564 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8565 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
8566 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8570 define void @s_shuffle_v4i64_v3i64__5_5_5_u() {
8571 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
8573 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8574 ; GFX9-NEXT: ;;#ASMSTART
8575 ; GFX9-NEXT: ; def s[8:13]
8576 ; GFX9-NEXT: ;;#ASMEND
8577 ; GFX9-NEXT: s_mov_b32 s8, s12
8578 ; GFX9-NEXT: s_mov_b32 s9, s13
8579 ; GFX9-NEXT: s_mov_b32 s10, s12
8580 ; GFX9-NEXT: s_mov_b32 s11, s13
8581 ; GFX9-NEXT: ;;#ASMSTART
8582 ; GFX9-NEXT: ; use s[8:15]
8583 ; GFX9-NEXT: ;;#ASMEND
8584 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8585 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8586 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8587 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
8588 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8592 define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
8593 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0:
8595 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8596 ; GFX900-NEXT: ;;#ASMSTART
8597 ; GFX900-NEXT: ; def s[4:9]
8598 ; GFX900-NEXT: ;;#ASMEND
8599 ; GFX900-NEXT: ;;#ASMSTART
8600 ; GFX900-NEXT: ; def s[8:13]
8601 ; GFX900-NEXT: ;;#ASMEND
8602 ; GFX900-NEXT: s_mov_b32 s8, s12
8603 ; GFX900-NEXT: s_mov_b32 s9, s13
8604 ; GFX900-NEXT: s_mov_b32 s10, s12
8605 ; GFX900-NEXT: s_mov_b32 s11, s13
8606 ; GFX900-NEXT: s_mov_b32 s14, s4
8607 ; GFX900-NEXT: s_mov_b32 s15, s5
8608 ; GFX900-NEXT: ;;#ASMSTART
8609 ; GFX900-NEXT: ; use s[8:15]
8610 ; GFX900-NEXT: ;;#ASMEND
8611 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8613 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0:
8615 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8616 ; GFX90A-NEXT: ;;#ASMSTART
8617 ; GFX90A-NEXT: ; def s[4:9]
8618 ; GFX90A-NEXT: ;;#ASMEND
8619 ; GFX90A-NEXT: ;;#ASMSTART
8620 ; GFX90A-NEXT: ; def s[8:13]
8621 ; GFX90A-NEXT: ;;#ASMEND
8622 ; GFX90A-NEXT: s_mov_b32 s8, s12
8623 ; GFX90A-NEXT: s_mov_b32 s9, s13
8624 ; GFX90A-NEXT: s_mov_b32 s10, s12
8625 ; GFX90A-NEXT: s_mov_b32 s11, s13
8626 ; GFX90A-NEXT: s_mov_b32 s14, s4
8627 ; GFX90A-NEXT: s_mov_b32 s15, s5
8628 ; GFX90A-NEXT: ;;#ASMSTART
8629 ; GFX90A-NEXT: ; use s[8:15]
8630 ; GFX90A-NEXT: ;;#ASMEND
8631 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8633 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0:
8635 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8636 ; GFX940-NEXT: ;;#ASMSTART
8637 ; GFX940-NEXT: ; def s[8:13]
8638 ; GFX940-NEXT: ;;#ASMEND
8639 ; GFX940-NEXT: ;;#ASMSTART
8640 ; GFX940-NEXT: ; def s[0:5]
8641 ; GFX940-NEXT: ;;#ASMEND
8642 ; GFX940-NEXT: s_mov_b32 s8, s12
8643 ; GFX940-NEXT: s_mov_b32 s9, s13
8644 ; GFX940-NEXT: s_mov_b32 s10, s12
8645 ; GFX940-NEXT: s_mov_b32 s11, s13
8646 ; GFX940-NEXT: s_mov_b32 s14, s0
8647 ; GFX940-NEXT: s_mov_b32 s15, s1
8648 ; GFX940-NEXT: ;;#ASMSTART
8649 ; GFX940-NEXT: ; use s[8:15]
8650 ; GFX940-NEXT: ;;#ASMEND
8651 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8652 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8653 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8654 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
8655 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8659 define void @s_shuffle_v4i64_v3i64__5_5_5_1() {
8660 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
8662 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8663 ; GFX900-NEXT: ;;#ASMSTART
8664 ; GFX900-NEXT: ; def s[4:9]
8665 ; GFX900-NEXT: ;;#ASMEND
8666 ; GFX900-NEXT: ;;#ASMSTART
8667 ; GFX900-NEXT: ; def s[8:13]
8668 ; GFX900-NEXT: ;;#ASMEND
8669 ; GFX900-NEXT: s_mov_b32 s8, s12
8670 ; GFX900-NEXT: s_mov_b32 s9, s13
8671 ; GFX900-NEXT: s_mov_b32 s10, s12
8672 ; GFX900-NEXT: s_mov_b32 s11, s13
8673 ; GFX900-NEXT: s_mov_b32 s14, s6
8674 ; GFX900-NEXT: s_mov_b32 s15, s7
8675 ; GFX900-NEXT: ;;#ASMSTART
8676 ; GFX900-NEXT: ; use s[8:15]
8677 ; GFX900-NEXT: ;;#ASMEND
8678 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8680 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
8682 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8683 ; GFX90A-NEXT: ;;#ASMSTART
8684 ; GFX90A-NEXT: ; def s[4:9]
8685 ; GFX90A-NEXT: ;;#ASMEND
8686 ; GFX90A-NEXT: ;;#ASMSTART
8687 ; GFX90A-NEXT: ; def s[8:13]
8688 ; GFX90A-NEXT: ;;#ASMEND
8689 ; GFX90A-NEXT: s_mov_b32 s8, s12
8690 ; GFX90A-NEXT: s_mov_b32 s9, s13
8691 ; GFX90A-NEXT: s_mov_b32 s10, s12
8692 ; GFX90A-NEXT: s_mov_b32 s11, s13
8693 ; GFX90A-NEXT: s_mov_b32 s14, s6
8694 ; GFX90A-NEXT: s_mov_b32 s15, s7
8695 ; GFX90A-NEXT: ;;#ASMSTART
8696 ; GFX90A-NEXT: ; use s[8:15]
8697 ; GFX90A-NEXT: ;;#ASMEND
8698 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8700 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
8702 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8703 ; GFX940-NEXT: ;;#ASMSTART
8704 ; GFX940-NEXT: ; def s[8:13]
8705 ; GFX940-NEXT: ;;#ASMEND
8706 ; GFX940-NEXT: ;;#ASMSTART
8707 ; GFX940-NEXT: ; def s[0:5]
8708 ; GFX940-NEXT: ;;#ASMEND
8709 ; GFX940-NEXT: s_mov_b32 s8, s12
8710 ; GFX940-NEXT: s_mov_b32 s9, s13
8711 ; GFX940-NEXT: s_mov_b32 s10, s12
8712 ; GFX940-NEXT: s_mov_b32 s11, s13
8713 ; GFX940-NEXT: s_mov_b32 s14, s2
8714 ; GFX940-NEXT: s_mov_b32 s15, s3
8715 ; GFX940-NEXT: ;;#ASMSTART
8716 ; GFX940-NEXT: ; use s[8:15]
8717 ; GFX940-NEXT: ;;#ASMEND
8718 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8719 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8720 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8721 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
8722 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8726 define void @s_shuffle_v4i64_v3i64__5_5_5_2() {
8727 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
8729 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8730 ; GFX900-NEXT: ;;#ASMSTART
8731 ; GFX900-NEXT: ; def s[12:17]
8732 ; GFX900-NEXT: ;;#ASMEND
8733 ; GFX900-NEXT: ;;#ASMSTART
8734 ; GFX900-NEXT: ; def s[8:13]
8735 ; GFX900-NEXT: ;;#ASMEND
8736 ; GFX900-NEXT: s_mov_b32 s8, s12
8737 ; GFX900-NEXT: s_mov_b32 s9, s13
8738 ; GFX900-NEXT: s_mov_b32 s10, s12
8739 ; GFX900-NEXT: s_mov_b32 s11, s13
8740 ; GFX900-NEXT: s_mov_b32 s14, s16
8741 ; GFX900-NEXT: s_mov_b32 s15, s17
8742 ; GFX900-NEXT: ;;#ASMSTART
8743 ; GFX900-NEXT: ; use s[8:15]
8744 ; GFX900-NEXT: ;;#ASMEND
8745 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8747 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
8749 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8750 ; GFX90A-NEXT: ;;#ASMSTART
8751 ; GFX90A-NEXT: ; def s[12:17]
8752 ; GFX90A-NEXT: ;;#ASMEND
8753 ; GFX90A-NEXT: ;;#ASMSTART
8754 ; GFX90A-NEXT: ; def s[8:13]
8755 ; GFX90A-NEXT: ;;#ASMEND
8756 ; GFX90A-NEXT: s_mov_b32 s8, s12
8757 ; GFX90A-NEXT: s_mov_b32 s9, s13
8758 ; GFX90A-NEXT: s_mov_b32 s10, s12
8759 ; GFX90A-NEXT: s_mov_b32 s11, s13
8760 ; GFX90A-NEXT: s_mov_b32 s14, s16
8761 ; GFX90A-NEXT: s_mov_b32 s15, s17
8762 ; GFX90A-NEXT: ;;#ASMSTART
8763 ; GFX90A-NEXT: ; use s[8:15]
8764 ; GFX90A-NEXT: ;;#ASMEND
8765 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8767 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
8769 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8770 ; GFX940-NEXT: ;;#ASMSTART
8771 ; GFX940-NEXT: ; def s[8:13]
8772 ; GFX940-NEXT: ;;#ASMEND
8773 ; GFX940-NEXT: ;;#ASMSTART
8774 ; GFX940-NEXT: ; def s[0:5]
8775 ; GFX940-NEXT: ;;#ASMEND
8776 ; GFX940-NEXT: s_mov_b32 s8, s12
8777 ; GFX940-NEXT: s_mov_b32 s9, s13
8778 ; GFX940-NEXT: s_mov_b32 s10, s12
8779 ; GFX940-NEXT: s_mov_b32 s11, s13
8780 ; GFX940-NEXT: s_mov_b32 s14, s4
8781 ; GFX940-NEXT: s_mov_b32 s15, s5
8782 ; GFX940-NEXT: ;;#ASMSTART
8783 ; GFX940-NEXT: ; use s[8:15]
8784 ; GFX940-NEXT: ;;#ASMEND
8785 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8786 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8787 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8788 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
8789 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8793 define void @s_shuffle_v4i64_v3i64__5_5_5_3() {
8794 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
8796 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8797 ; GFX900-NEXT: ;;#ASMSTART
8798 ; GFX900-NEXT: ; def s[16:21]
8799 ; GFX900-NEXT: ;;#ASMEND
8800 ; GFX900-NEXT: s_mov_b32 s8, s20
8801 ; GFX900-NEXT: s_mov_b32 s9, s21
8802 ; GFX900-NEXT: s_mov_b32 s10, s20
8803 ; GFX900-NEXT: s_mov_b32 s11, s21
8804 ; GFX900-NEXT: s_mov_b32 s12, s20
8805 ; GFX900-NEXT: s_mov_b32 s13, s21
8806 ; GFX900-NEXT: s_mov_b32 s14, s16
8807 ; GFX900-NEXT: s_mov_b32 s15, s17
8808 ; GFX900-NEXT: ;;#ASMSTART
8809 ; GFX900-NEXT: ; use s[8:15]
8810 ; GFX900-NEXT: ;;#ASMEND
8811 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8813 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
8815 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8816 ; GFX90A-NEXT: ;;#ASMSTART
8817 ; GFX90A-NEXT: ; def s[16:21]
8818 ; GFX90A-NEXT: ;;#ASMEND
8819 ; GFX90A-NEXT: s_mov_b32 s8, s20
8820 ; GFX90A-NEXT: s_mov_b32 s9, s21
8821 ; GFX90A-NEXT: s_mov_b32 s10, s20
8822 ; GFX90A-NEXT: s_mov_b32 s11, s21
8823 ; GFX90A-NEXT: s_mov_b32 s12, s20
8824 ; GFX90A-NEXT: s_mov_b32 s13, s21
8825 ; GFX90A-NEXT: s_mov_b32 s14, s16
8826 ; GFX90A-NEXT: s_mov_b32 s15, s17
8827 ; GFX90A-NEXT: ;;#ASMSTART
8828 ; GFX90A-NEXT: ; use s[8:15]
8829 ; GFX90A-NEXT: ;;#ASMEND
8830 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8832 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
8834 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8835 ; GFX940-NEXT: ;;#ASMSTART
8836 ; GFX940-NEXT: ; def s[0:5]
8837 ; GFX940-NEXT: ;;#ASMEND
8838 ; GFX940-NEXT: s_mov_b32 s8, s4
8839 ; GFX940-NEXT: s_mov_b32 s9, s5
8840 ; GFX940-NEXT: s_mov_b32 s10, s4
8841 ; GFX940-NEXT: s_mov_b32 s11, s5
8842 ; GFX940-NEXT: s_mov_b32 s12, s4
8843 ; GFX940-NEXT: s_mov_b32 s13, s5
8844 ; GFX940-NEXT: s_mov_b32 s14, s0
8845 ; GFX940-NEXT: s_mov_b32 s15, s1
8846 ; GFX940-NEXT: ;;#ASMSTART
8847 ; GFX940-NEXT: ; use s[8:15]
8848 ; GFX940-NEXT: ;;#ASMEND
8849 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8850 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8851 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8852 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
8853 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8857 define void @s_shuffle_v4i64_v3i64__5_5_5_4() {
8858 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
8860 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8861 ; GFX900-NEXT: ;;#ASMSTART
8862 ; GFX900-NEXT: ; def s[12:17]
8863 ; GFX900-NEXT: ;;#ASMEND
8864 ; GFX900-NEXT: s_mov_b32 s8, s16
8865 ; GFX900-NEXT: s_mov_b32 s9, s17
8866 ; GFX900-NEXT: s_mov_b32 s10, s16
8867 ; GFX900-NEXT: s_mov_b32 s11, s17
8868 ; GFX900-NEXT: s_mov_b32 s12, s16
8869 ; GFX900-NEXT: s_mov_b32 s13, s17
8870 ; GFX900-NEXT: ;;#ASMSTART
8871 ; GFX900-NEXT: ; use s[8:15]
8872 ; GFX900-NEXT: ;;#ASMEND
8873 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8875 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
8877 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8878 ; GFX90A-NEXT: ;;#ASMSTART
8879 ; GFX90A-NEXT: ; def s[12:17]
8880 ; GFX90A-NEXT: ;;#ASMEND
8881 ; GFX90A-NEXT: s_mov_b32 s8, s16
8882 ; GFX90A-NEXT: s_mov_b32 s9, s17
8883 ; GFX90A-NEXT: s_mov_b32 s10, s16
8884 ; GFX90A-NEXT: s_mov_b32 s11, s17
8885 ; GFX90A-NEXT: s_mov_b32 s12, s16
8886 ; GFX90A-NEXT: s_mov_b32 s13, s17
8887 ; GFX90A-NEXT: ;;#ASMSTART
8888 ; GFX90A-NEXT: ; use s[8:15]
8889 ; GFX90A-NEXT: ;;#ASMEND
8890 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8892 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
8894 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8895 ; GFX940-NEXT: ;;#ASMSTART
8896 ; GFX940-NEXT: ; def s[0:5]
8897 ; GFX940-NEXT: ;;#ASMEND
8898 ; GFX940-NEXT: s_mov_b32 s8, s4
8899 ; GFX940-NEXT: s_mov_b32 s9, s5
8900 ; GFX940-NEXT: s_mov_b32 s10, s4
8901 ; GFX940-NEXT: s_mov_b32 s11, s5
8902 ; GFX940-NEXT: s_mov_b32 s12, s4
8903 ; GFX940-NEXT: s_mov_b32 s13, s5
8904 ; GFX940-NEXT: s_mov_b32 s14, s2
8905 ; GFX940-NEXT: s_mov_b32 s15, s3
8906 ; GFX940-NEXT: ;;#ASMSTART
8907 ; GFX940-NEXT: ; use s[8:15]
8908 ; GFX940-NEXT: ;;#ASMEND
8909 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8910 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8911 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8912 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
8913 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8917 define void @s_shuffle_v4i64_v3i64__5_5_5_5() {
8918 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
8920 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8921 ; GFX9-NEXT: ;;#ASMSTART
8922 ; GFX9-NEXT: ; def s[8:13]
8923 ; GFX9-NEXT: ;;#ASMEND
8924 ; GFX9-NEXT: s_mov_b32 s8, s12
8925 ; GFX9-NEXT: s_mov_b32 s9, s13
8926 ; GFX9-NEXT: s_mov_b32 s10, s12
8927 ; GFX9-NEXT: s_mov_b32 s11, s13
8928 ; GFX9-NEXT: s_mov_b32 s14, s12
8929 ; GFX9-NEXT: s_mov_b32 s15, s13
8930 ; GFX9-NEXT: ;;#ASMSTART
8931 ; GFX9-NEXT: ; use s[8:15]
8932 ; GFX9-NEXT: ;;#ASMEND
8933 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8934 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8935 %vec1 = call <3 x i64> asm "; def $0", "=s"()
8936 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
8937 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8941 define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
8942 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
8944 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8945 ; GFX900-NEXT: ;;#ASMSTART
8946 ; GFX900-NEXT: ; def s[4:9]
8947 ; GFX900-NEXT: ;;#ASMEND
8948 ; GFX900-NEXT: s_mov_b32 s10, s4
8949 ; GFX900-NEXT: s_mov_b32 s11, s5
8950 ; GFX900-NEXT: s_mov_b32 s12, s4
8951 ; GFX900-NEXT: s_mov_b32 s13, s5
8952 ; GFX900-NEXT: s_mov_b32 s14, s4
8953 ; GFX900-NEXT: s_mov_b32 s15, s5
8954 ; GFX900-NEXT: ;;#ASMSTART
8955 ; GFX900-NEXT: ; use s[8:15]
8956 ; GFX900-NEXT: ;;#ASMEND
8957 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8959 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
8961 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8962 ; GFX90A-NEXT: ;;#ASMSTART
8963 ; GFX90A-NEXT: ; def s[4:9]
8964 ; GFX90A-NEXT: ;;#ASMEND
8965 ; GFX90A-NEXT: s_mov_b32 s10, s4
8966 ; GFX90A-NEXT: s_mov_b32 s11, s5
8967 ; GFX90A-NEXT: s_mov_b32 s12, s4
8968 ; GFX90A-NEXT: s_mov_b32 s13, s5
8969 ; GFX90A-NEXT: s_mov_b32 s14, s4
8970 ; GFX90A-NEXT: s_mov_b32 s15, s5
8971 ; GFX90A-NEXT: ;;#ASMSTART
8972 ; GFX90A-NEXT: ; use s[8:15]
8973 ; GFX90A-NEXT: ;;#ASMEND
8974 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8976 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
8978 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8979 ; GFX940-NEXT: ;;#ASMSTART
8980 ; GFX940-NEXT: ; def s[0:5]
8981 ; GFX940-NEXT: ;;#ASMEND
8982 ; GFX940-NEXT: s_mov_b32 s10, s0
8983 ; GFX940-NEXT: s_mov_b32 s11, s1
8984 ; GFX940-NEXT: s_mov_b32 s12, s0
8985 ; GFX940-NEXT: s_mov_b32 s13, s1
8986 ; GFX940-NEXT: s_mov_b32 s14, s0
8987 ; GFX940-NEXT: s_mov_b32 s15, s1
8988 ; GFX940-NEXT: ;;#ASMSTART
8989 ; GFX940-NEXT: ; use s[8:15]
8990 ; GFX940-NEXT: ;;#ASMEND
8991 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8992 %vec0 = call <3 x i64> asm "; def $0", "=s"()
8993 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
8994 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8998 define void @s_shuffle_v4i64_v3i64__0_0_0_0() {
8999 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
9001 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9002 ; GFX9-NEXT: ;;#ASMSTART
9003 ; GFX9-NEXT: ; def s[8:13]
9004 ; GFX9-NEXT: ;;#ASMEND
9005 ; GFX9-NEXT: s_mov_b32 s10, s8
9006 ; GFX9-NEXT: s_mov_b32 s11, s9
9007 ; GFX9-NEXT: s_mov_b32 s12, s8
9008 ; GFX9-NEXT: s_mov_b32 s13, s9
9009 ; GFX9-NEXT: s_mov_b32 s14, s8
9010 ; GFX9-NEXT: s_mov_b32 s15, s9
9011 ; GFX9-NEXT: ;;#ASMSTART
9012 ; GFX9-NEXT: ; use s[8:15]
9013 ; GFX9-NEXT: ;;#ASMEND
9014 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9015 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9016 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer
9017 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9021 define void @s_shuffle_v4i64_v3i64__1_0_0_0() {
9022 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
9024 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9025 ; GFX900-NEXT: ;;#ASMSTART
9026 ; GFX900-NEXT: ; def s[4:9]
9027 ; GFX900-NEXT: ;;#ASMEND
9028 ; GFX900-NEXT: s_mov_b32 s8, s6
9029 ; GFX900-NEXT: s_mov_b32 s9, s7
9030 ; GFX900-NEXT: s_mov_b32 s10, s4
9031 ; GFX900-NEXT: s_mov_b32 s11, s5
9032 ; GFX900-NEXT: s_mov_b32 s12, s4
9033 ; GFX900-NEXT: s_mov_b32 s13, s5
9034 ; GFX900-NEXT: s_mov_b32 s14, s4
9035 ; GFX900-NEXT: s_mov_b32 s15, s5
9036 ; GFX900-NEXT: ;;#ASMSTART
9037 ; GFX900-NEXT: ; use s[8:15]
9038 ; GFX900-NEXT: ;;#ASMEND
9039 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9041 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
9043 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9044 ; GFX90A-NEXT: ;;#ASMSTART
9045 ; GFX90A-NEXT: ; def s[4:9]
9046 ; GFX90A-NEXT: ;;#ASMEND
9047 ; GFX90A-NEXT: s_mov_b32 s8, s6
9048 ; GFX90A-NEXT: s_mov_b32 s9, s7
9049 ; GFX90A-NEXT: s_mov_b32 s10, s4
9050 ; GFX90A-NEXT: s_mov_b32 s11, s5
9051 ; GFX90A-NEXT: s_mov_b32 s12, s4
9052 ; GFX90A-NEXT: s_mov_b32 s13, s5
9053 ; GFX90A-NEXT: s_mov_b32 s14, s4
9054 ; GFX90A-NEXT: s_mov_b32 s15, s5
9055 ; GFX90A-NEXT: ;;#ASMSTART
9056 ; GFX90A-NEXT: ; use s[8:15]
9057 ; GFX90A-NEXT: ;;#ASMEND
9058 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9060 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
9062 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9063 ; GFX940-NEXT: ;;#ASMSTART
9064 ; GFX940-NEXT: ; def s[0:5]
9065 ; GFX940-NEXT: ;;#ASMEND
9066 ; GFX940-NEXT: s_mov_b32 s8, s2
9067 ; GFX940-NEXT: s_mov_b32 s9, s3
9068 ; GFX940-NEXT: s_mov_b32 s10, s0
9069 ; GFX940-NEXT: s_mov_b32 s11, s1
9070 ; GFX940-NEXT: s_mov_b32 s12, s0
9071 ; GFX940-NEXT: s_mov_b32 s13, s1
9072 ; GFX940-NEXT: s_mov_b32 s14, s0
9073 ; GFX940-NEXT: s_mov_b32 s15, s1
9074 ; GFX940-NEXT: ;;#ASMSTART
9075 ; GFX940-NEXT: ; use s[8:15]
9076 ; GFX940-NEXT: ;;#ASMEND
9077 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9078 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9079 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
9080 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9084 define void @s_shuffle_v4i64_v3i64__2_0_0_0() {
9085 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
9087 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9088 ; GFX900-NEXT: ;;#ASMSTART
9089 ; GFX900-NEXT: ; def s[4:9]
9090 ; GFX900-NEXT: ;;#ASMEND
9091 ; GFX900-NEXT: s_mov_b32 s10, s4
9092 ; GFX900-NEXT: s_mov_b32 s11, s5
9093 ; GFX900-NEXT: s_mov_b32 s12, s4
9094 ; GFX900-NEXT: s_mov_b32 s13, s5
9095 ; GFX900-NEXT: s_mov_b32 s14, s4
9096 ; GFX900-NEXT: s_mov_b32 s15, s5
9097 ; GFX900-NEXT: ;;#ASMSTART
9098 ; GFX900-NEXT: ; use s[8:15]
9099 ; GFX900-NEXT: ;;#ASMEND
9100 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9102 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
9104 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9105 ; GFX90A-NEXT: ;;#ASMSTART
9106 ; GFX90A-NEXT: ; def s[4:9]
9107 ; GFX90A-NEXT: ;;#ASMEND
9108 ; GFX90A-NEXT: s_mov_b32 s10, s4
9109 ; GFX90A-NEXT: s_mov_b32 s11, s5
9110 ; GFX90A-NEXT: s_mov_b32 s12, s4
9111 ; GFX90A-NEXT: s_mov_b32 s13, s5
9112 ; GFX90A-NEXT: s_mov_b32 s14, s4
9113 ; GFX90A-NEXT: s_mov_b32 s15, s5
9114 ; GFX90A-NEXT: ;;#ASMSTART
9115 ; GFX90A-NEXT: ; use s[8:15]
9116 ; GFX90A-NEXT: ;;#ASMEND
9117 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9119 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
9121 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9122 ; GFX940-NEXT: ;;#ASMSTART
9123 ; GFX940-NEXT: ; def s[0:5]
9124 ; GFX940-NEXT: ;;#ASMEND
9125 ; GFX940-NEXT: s_mov_b32 s8, s4
9126 ; GFX940-NEXT: s_mov_b32 s9, s5
9127 ; GFX940-NEXT: s_mov_b32 s10, s0
9128 ; GFX940-NEXT: s_mov_b32 s11, s1
9129 ; GFX940-NEXT: s_mov_b32 s12, s0
9130 ; GFX940-NEXT: s_mov_b32 s13, s1
9131 ; GFX940-NEXT: s_mov_b32 s14, s0
9132 ; GFX940-NEXT: s_mov_b32 s15, s1
9133 ; GFX940-NEXT: ;;#ASMSTART
9134 ; GFX940-NEXT: ; use s[8:15]
9135 ; GFX940-NEXT: ;;#ASMEND
9136 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9137 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9138 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
9139 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9143 define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
9144 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
9146 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9147 ; GFX900-NEXT: ;;#ASMSTART
9148 ; GFX900-NEXT: ; def s[4:9]
9149 ; GFX900-NEXT: ;;#ASMEND
9150 ; GFX900-NEXT: s_mov_b32 s10, s4
9151 ; GFX900-NEXT: s_mov_b32 s11, s5
9152 ; GFX900-NEXT: s_mov_b32 s12, s4
9153 ; GFX900-NEXT: s_mov_b32 s13, s5
9154 ; GFX900-NEXT: s_mov_b32 s14, s4
9155 ; GFX900-NEXT: s_mov_b32 s15, s5
9156 ; GFX900-NEXT: ;;#ASMSTART
9157 ; GFX900-NEXT: ; use s[8:15]
9158 ; GFX900-NEXT: ;;#ASMEND
9159 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9161 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
9163 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9164 ; GFX90A-NEXT: ;;#ASMSTART
9165 ; GFX90A-NEXT: ; def s[4:9]
9166 ; GFX90A-NEXT: ;;#ASMEND
9167 ; GFX90A-NEXT: s_mov_b32 s10, s4
9168 ; GFX90A-NEXT: s_mov_b32 s11, s5
9169 ; GFX90A-NEXT: s_mov_b32 s12, s4
9170 ; GFX90A-NEXT: s_mov_b32 s13, s5
9171 ; GFX90A-NEXT: s_mov_b32 s14, s4
9172 ; GFX90A-NEXT: s_mov_b32 s15, s5
9173 ; GFX90A-NEXT: ;;#ASMSTART
9174 ; GFX90A-NEXT: ; use s[8:15]
9175 ; GFX90A-NEXT: ;;#ASMEND
9176 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9178 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
9180 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9181 ; GFX940-NEXT: ;;#ASMSTART
9182 ; GFX940-NEXT: ; def s[0:5]
9183 ; GFX940-NEXT: ;;#ASMEND
9184 ; GFX940-NEXT: s_mov_b32 s10, s0
9185 ; GFX940-NEXT: s_mov_b32 s11, s1
9186 ; GFX940-NEXT: s_mov_b32 s12, s0
9187 ; GFX940-NEXT: s_mov_b32 s13, s1
9188 ; GFX940-NEXT: s_mov_b32 s14, s0
9189 ; GFX940-NEXT: s_mov_b32 s15, s1
9190 ; GFX940-NEXT: ;;#ASMSTART
9191 ; GFX940-NEXT: ; use s[8:15]
9192 ; GFX940-NEXT: ;;#ASMEND
9193 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9194 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9195 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
9196 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9200 define void @s_shuffle_v4i64_v3i64__4_0_0_0() {
9201 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
9203 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9204 ; GFX900-NEXT: ;;#ASMSTART
9205 ; GFX900-NEXT: ; def s[4:9]
9206 ; GFX900-NEXT: ;;#ASMEND
9207 ; GFX900-NEXT: ;;#ASMSTART
9208 ; GFX900-NEXT: ; def s[8:13]
9209 ; GFX900-NEXT: ;;#ASMEND
9210 ; GFX900-NEXT: s_mov_b32 s8, s10
9211 ; GFX900-NEXT: s_mov_b32 s9, s11
9212 ; GFX900-NEXT: s_mov_b32 s10, s4
9213 ; GFX900-NEXT: s_mov_b32 s11, s5
9214 ; GFX900-NEXT: s_mov_b32 s12, s4
9215 ; GFX900-NEXT: s_mov_b32 s13, s5
9216 ; GFX900-NEXT: s_mov_b32 s14, s4
9217 ; GFX900-NEXT: s_mov_b32 s15, s5
9218 ; GFX900-NEXT: ;;#ASMSTART
9219 ; GFX900-NEXT: ; use s[8:15]
9220 ; GFX900-NEXT: ;;#ASMEND
9221 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9223 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
9225 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9226 ; GFX90A-NEXT: ;;#ASMSTART
9227 ; GFX90A-NEXT: ; def s[4:9]
9228 ; GFX90A-NEXT: ;;#ASMEND
9229 ; GFX90A-NEXT: ;;#ASMSTART
9230 ; GFX90A-NEXT: ; def s[8:13]
9231 ; GFX90A-NEXT: ;;#ASMEND
9232 ; GFX90A-NEXT: s_mov_b32 s8, s10
9233 ; GFX90A-NEXT: s_mov_b32 s9, s11
9234 ; GFX90A-NEXT: s_mov_b32 s10, s4
9235 ; GFX90A-NEXT: s_mov_b32 s11, s5
9236 ; GFX90A-NEXT: s_mov_b32 s12, s4
9237 ; GFX90A-NEXT: s_mov_b32 s13, s5
9238 ; GFX90A-NEXT: s_mov_b32 s14, s4
9239 ; GFX90A-NEXT: s_mov_b32 s15, s5
9240 ; GFX90A-NEXT: ;;#ASMSTART
9241 ; GFX90A-NEXT: ; use s[8:15]
9242 ; GFX90A-NEXT: ;;#ASMEND
9243 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9245 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
9247 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9248 ; GFX940-NEXT: ;;#ASMSTART
9249 ; GFX940-NEXT: ; def s[0:5]
9250 ; GFX940-NEXT: ;;#ASMEND
9251 ; GFX940-NEXT: s_mov_b32 s10, s0
9252 ; GFX940-NEXT: ;;#ASMSTART
9253 ; GFX940-NEXT: ; def s[4:9]
9254 ; GFX940-NEXT: ;;#ASMEND
9255 ; GFX940-NEXT: s_mov_b32 s8, s6
9256 ; GFX940-NEXT: s_mov_b32 s9, s7
9257 ; GFX940-NEXT: s_mov_b32 s11, s1
9258 ; GFX940-NEXT: s_mov_b32 s12, s0
9259 ; GFX940-NEXT: s_mov_b32 s13, s1
9260 ; GFX940-NEXT: s_mov_b32 s14, s0
9261 ; GFX940-NEXT: s_mov_b32 s15, s1
9262 ; GFX940-NEXT: ;;#ASMSTART
9263 ; GFX940-NEXT: ; use s[8:15]
9264 ; GFX940-NEXT: ;;#ASMEND
9265 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9266 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9267 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9268 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
9269 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9273 define void @s_shuffle_v4i64_v3i64__5_0_0_0() {
9274 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
9276 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9277 ; GFX900-NEXT: ;;#ASMSTART
9278 ; GFX900-NEXT: ; def s[4:9]
9279 ; GFX900-NEXT: ;;#ASMEND
9280 ; GFX900-NEXT: ;;#ASMSTART
9281 ; GFX900-NEXT: ; def s[8:13]
9282 ; GFX900-NEXT: ;;#ASMEND
9283 ; GFX900-NEXT: s_mov_b32 s8, s12
9284 ; GFX900-NEXT: s_mov_b32 s9, s13
9285 ; GFX900-NEXT: s_mov_b32 s10, s4
9286 ; GFX900-NEXT: s_mov_b32 s11, s5
9287 ; GFX900-NEXT: s_mov_b32 s12, s4
9288 ; GFX900-NEXT: s_mov_b32 s13, s5
9289 ; GFX900-NEXT: s_mov_b32 s14, s4
9290 ; GFX900-NEXT: s_mov_b32 s15, s5
9291 ; GFX900-NEXT: ;;#ASMSTART
9292 ; GFX900-NEXT: ; use s[8:15]
9293 ; GFX900-NEXT: ;;#ASMEND
9294 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9296 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
9298 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9299 ; GFX90A-NEXT: ;;#ASMSTART
9300 ; GFX90A-NEXT: ; def s[4:9]
9301 ; GFX90A-NEXT: ;;#ASMEND
9302 ; GFX90A-NEXT: ;;#ASMSTART
9303 ; GFX90A-NEXT: ; def s[8:13]
9304 ; GFX90A-NEXT: ;;#ASMEND
9305 ; GFX90A-NEXT: s_mov_b32 s8, s12
9306 ; GFX90A-NEXT: s_mov_b32 s9, s13
9307 ; GFX90A-NEXT: s_mov_b32 s10, s4
9308 ; GFX90A-NEXT: s_mov_b32 s11, s5
9309 ; GFX90A-NEXT: s_mov_b32 s12, s4
9310 ; GFX90A-NEXT: s_mov_b32 s13, s5
9311 ; GFX90A-NEXT: s_mov_b32 s14, s4
9312 ; GFX90A-NEXT: s_mov_b32 s15, s5
9313 ; GFX90A-NEXT: ;;#ASMSTART
9314 ; GFX90A-NEXT: ; use s[8:15]
9315 ; GFX90A-NEXT: ;;#ASMEND
9316 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9318 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
9320 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9321 ; GFX940-NEXT: ;;#ASMSTART
9322 ; GFX940-NEXT: ; def s[0:5]
9323 ; GFX940-NEXT: ;;#ASMEND
9324 ; GFX940-NEXT: s_mov_b32 s10, s0
9325 ; GFX940-NEXT: ;;#ASMSTART
9326 ; GFX940-NEXT: ; def s[4:9]
9327 ; GFX940-NEXT: ;;#ASMEND
9328 ; GFX940-NEXT: s_mov_b32 s11, s1
9329 ; GFX940-NEXT: s_mov_b32 s12, s0
9330 ; GFX940-NEXT: s_mov_b32 s13, s1
9331 ; GFX940-NEXT: s_mov_b32 s14, s0
9332 ; GFX940-NEXT: s_mov_b32 s15, s1
9333 ; GFX940-NEXT: ;;#ASMSTART
9334 ; GFX940-NEXT: ; use s[8:15]
9335 ; GFX940-NEXT: ;;#ASMEND
9336 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9337 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9338 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9339 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
9340 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9344 define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
9345 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
9347 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9348 ; GFX900-NEXT: ;;#ASMSTART
9349 ; GFX900-NEXT: ; def s[4:9]
9350 ; GFX900-NEXT: ;;#ASMEND
9351 ; GFX900-NEXT: ;;#ASMSTART
9352 ; GFX900-NEXT: ; def s[8:13]
9353 ; GFX900-NEXT: ;;#ASMEND
9354 ; GFX900-NEXT: s_mov_b32 s8, s12
9355 ; GFX900-NEXT: s_mov_b32 s9, s13
9356 ; GFX900-NEXT: s_mov_b32 s12, s4
9357 ; GFX900-NEXT: s_mov_b32 s13, s5
9358 ; GFX900-NEXT: s_mov_b32 s14, s4
9359 ; GFX900-NEXT: s_mov_b32 s15, s5
9360 ; GFX900-NEXT: ;;#ASMSTART
9361 ; GFX900-NEXT: ; use s[8:15]
9362 ; GFX900-NEXT: ;;#ASMEND
9363 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9365 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
9367 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9368 ; GFX90A-NEXT: ;;#ASMSTART
9369 ; GFX90A-NEXT: ; def s[4:9]
9370 ; GFX90A-NEXT: ;;#ASMEND
9371 ; GFX90A-NEXT: ;;#ASMSTART
9372 ; GFX90A-NEXT: ; def s[8:13]
9373 ; GFX90A-NEXT: ;;#ASMEND
9374 ; GFX90A-NEXT: s_mov_b32 s8, s12
9375 ; GFX90A-NEXT: s_mov_b32 s9, s13
9376 ; GFX90A-NEXT: s_mov_b32 s12, s4
9377 ; GFX90A-NEXT: s_mov_b32 s13, s5
9378 ; GFX90A-NEXT: s_mov_b32 s14, s4
9379 ; GFX90A-NEXT: s_mov_b32 s15, s5
9380 ; GFX90A-NEXT: ;;#ASMSTART
9381 ; GFX90A-NEXT: ; use s[8:15]
9382 ; GFX90A-NEXT: ;;#ASMEND
9383 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9385 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
9387 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9388 ; GFX940-NEXT: ;;#ASMSTART
9389 ; GFX940-NEXT: ; def s[0:5]
9390 ; GFX940-NEXT: ;;#ASMEND
9391 ; GFX940-NEXT: s_mov_b32 s12, s0
9392 ; GFX940-NEXT: ;;#ASMSTART
9393 ; GFX940-NEXT: ; def s[4:9]
9394 ; GFX940-NEXT: ;;#ASMEND
9395 ; GFX940-NEXT: s_mov_b32 s13, s1
9396 ; GFX940-NEXT: s_mov_b32 s14, s0
9397 ; GFX940-NEXT: s_mov_b32 s15, s1
9398 ; GFX940-NEXT: ;;#ASMSTART
9399 ; GFX940-NEXT: ; use s[8:15]
9400 ; GFX940-NEXT: ;;#ASMEND
9401 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9402 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9403 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9404 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
9405 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9409 define void @s_shuffle_v4i64_v3i64__5_1_0_0() {
9410 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
9412 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9413 ; GFX900-NEXT: ;;#ASMSTART
9414 ; GFX900-NEXT: ; def s[4:9]
9415 ; GFX900-NEXT: ;;#ASMEND
9416 ; GFX900-NEXT: ;;#ASMSTART
9417 ; GFX900-NEXT: ; def s[8:13]
9418 ; GFX900-NEXT: ;;#ASMEND
9419 ; GFX900-NEXT: s_mov_b32 s8, s12
9420 ; GFX900-NEXT: s_mov_b32 s9, s13
9421 ; GFX900-NEXT: s_mov_b32 s10, s6
9422 ; GFX900-NEXT: s_mov_b32 s11, s7
9423 ; GFX900-NEXT: s_mov_b32 s12, s4
9424 ; GFX900-NEXT: s_mov_b32 s13, s5
9425 ; GFX900-NEXT: s_mov_b32 s14, s4
9426 ; GFX900-NEXT: s_mov_b32 s15, s5
9427 ; GFX900-NEXT: ;;#ASMSTART
9428 ; GFX900-NEXT: ; use s[8:15]
9429 ; GFX900-NEXT: ;;#ASMEND
9430 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9432 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
9434 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9435 ; GFX90A-NEXT: ;;#ASMSTART
9436 ; GFX90A-NEXT: ; def s[4:9]
9437 ; GFX90A-NEXT: ;;#ASMEND
9438 ; GFX90A-NEXT: ;;#ASMSTART
9439 ; GFX90A-NEXT: ; def s[8:13]
9440 ; GFX90A-NEXT: ;;#ASMEND
9441 ; GFX90A-NEXT: s_mov_b32 s8, s12
9442 ; GFX90A-NEXT: s_mov_b32 s9, s13
9443 ; GFX90A-NEXT: s_mov_b32 s10, s6
9444 ; GFX90A-NEXT: s_mov_b32 s11, s7
9445 ; GFX90A-NEXT: s_mov_b32 s12, s4
9446 ; GFX90A-NEXT: s_mov_b32 s13, s5
9447 ; GFX90A-NEXT: s_mov_b32 s14, s4
9448 ; GFX90A-NEXT: s_mov_b32 s15, s5
9449 ; GFX90A-NEXT: ;;#ASMSTART
9450 ; GFX90A-NEXT: ; use s[8:15]
9451 ; GFX90A-NEXT: ;;#ASMEND
9452 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9454 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
9456 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9457 ; GFX940-NEXT: ;;#ASMSTART
9458 ; GFX940-NEXT: ; def s[0:5]
9459 ; GFX940-NEXT: ;;#ASMEND
9460 ; GFX940-NEXT: s_mov_b32 s10, s2
9461 ; GFX940-NEXT: ;;#ASMSTART
9462 ; GFX940-NEXT: ; def s[4:9]
9463 ; GFX940-NEXT: ;;#ASMEND
9464 ; GFX940-NEXT: s_mov_b32 s11, s3
9465 ; GFX940-NEXT: s_mov_b32 s12, s0
9466 ; GFX940-NEXT: s_mov_b32 s13, s1
9467 ; GFX940-NEXT: s_mov_b32 s14, s0
9468 ; GFX940-NEXT: s_mov_b32 s15, s1
9469 ; GFX940-NEXT: ;;#ASMSTART
9470 ; GFX940-NEXT: ; use s[8:15]
9471 ; GFX940-NEXT: ;;#ASMEND
9472 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9473 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9474 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9475 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
9476 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9480 define void @s_shuffle_v4i64_v3i64__5_2_0_0() {
9481 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
9483 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9484 ; GFX900-NEXT: ;;#ASMSTART
9485 ; GFX900-NEXT: ; def s[16:21]
9486 ; GFX900-NEXT: ;;#ASMEND
9487 ; GFX900-NEXT: ;;#ASMSTART
9488 ; GFX900-NEXT: ; def s[4:9]
9489 ; GFX900-NEXT: ;;#ASMEND
9490 ; GFX900-NEXT: s_mov_b32 s10, s20
9491 ; GFX900-NEXT: s_mov_b32 s11, s21
9492 ; GFX900-NEXT: s_mov_b32 s12, s16
9493 ; GFX900-NEXT: s_mov_b32 s13, s17
9494 ; GFX900-NEXT: s_mov_b32 s14, s16
9495 ; GFX900-NEXT: s_mov_b32 s15, s17
9496 ; GFX900-NEXT: ;;#ASMSTART
9497 ; GFX900-NEXT: ; use s[8:15]
9498 ; GFX900-NEXT: ;;#ASMEND
9499 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9501 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
9503 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9504 ; GFX90A-NEXT: ;;#ASMSTART
9505 ; GFX90A-NEXT: ; def s[16:21]
9506 ; GFX90A-NEXT: ;;#ASMEND
9507 ; GFX90A-NEXT: ;;#ASMSTART
9508 ; GFX90A-NEXT: ; def s[4:9]
9509 ; GFX90A-NEXT: ;;#ASMEND
9510 ; GFX90A-NEXT: s_mov_b32 s10, s20
9511 ; GFX90A-NEXT: s_mov_b32 s11, s21
9512 ; GFX90A-NEXT: s_mov_b32 s12, s16
9513 ; GFX90A-NEXT: s_mov_b32 s13, s17
9514 ; GFX90A-NEXT: s_mov_b32 s14, s16
9515 ; GFX90A-NEXT: s_mov_b32 s15, s17
9516 ; GFX90A-NEXT: ;;#ASMSTART
9517 ; GFX90A-NEXT: ; use s[8:15]
9518 ; GFX90A-NEXT: ;;#ASMEND
9519 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9521 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
9523 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9524 ; GFX940-NEXT: ;;#ASMSTART
9525 ; GFX940-NEXT: ; def s[8:13]
9526 ; GFX940-NEXT: ;;#ASMEND
9527 ; GFX940-NEXT: ;;#ASMSTART
9528 ; GFX940-NEXT: ; def s[0:5]
9529 ; GFX940-NEXT: ;;#ASMEND
9530 ; GFX940-NEXT: s_mov_b32 s8, s12
9531 ; GFX940-NEXT: s_mov_b32 s9, s13
9532 ; GFX940-NEXT: s_mov_b32 s10, s4
9533 ; GFX940-NEXT: s_mov_b32 s11, s5
9534 ; GFX940-NEXT: s_mov_b32 s12, s0
9535 ; GFX940-NEXT: s_mov_b32 s13, s1
9536 ; GFX940-NEXT: s_mov_b32 s14, s0
9537 ; GFX940-NEXT: s_mov_b32 s15, s1
9538 ; GFX940-NEXT: ;;#ASMSTART
9539 ; GFX940-NEXT: ; use s[8:15]
9540 ; GFX940-NEXT: ;;#ASMEND
9541 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9542 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9543 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9544 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
9545 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9549 define void @s_shuffle_v4i64_v3i64__5_3_0_0() {
9550 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
9552 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9553 ; GFX900-NEXT: ;;#ASMSTART
9554 ; GFX900-NEXT: ; def s[4:9]
9555 ; GFX900-NEXT: ;;#ASMEND
9556 ; GFX900-NEXT: ;;#ASMSTART
9557 ; GFX900-NEXT: ; def s[12:17]
9558 ; GFX900-NEXT: ;;#ASMEND
9559 ; GFX900-NEXT: s_mov_b32 s8, s16
9560 ; GFX900-NEXT: s_mov_b32 s9, s17
9561 ; GFX900-NEXT: s_mov_b32 s10, s12
9562 ; GFX900-NEXT: s_mov_b32 s11, s13
9563 ; GFX900-NEXT: s_mov_b32 s12, s4
9564 ; GFX900-NEXT: s_mov_b32 s13, s5
9565 ; GFX900-NEXT: s_mov_b32 s14, s4
9566 ; GFX900-NEXT: s_mov_b32 s15, s5
9567 ; GFX900-NEXT: ;;#ASMSTART
9568 ; GFX900-NEXT: ; use s[8:15]
9569 ; GFX900-NEXT: ;;#ASMEND
9570 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9572 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
9574 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9575 ; GFX90A-NEXT: ;;#ASMSTART
9576 ; GFX90A-NEXT: ; def s[4:9]
9577 ; GFX90A-NEXT: ;;#ASMEND
9578 ; GFX90A-NEXT: ;;#ASMSTART
9579 ; GFX90A-NEXT: ; def s[12:17]
9580 ; GFX90A-NEXT: ;;#ASMEND
9581 ; GFX90A-NEXT: s_mov_b32 s8, s16
9582 ; GFX90A-NEXT: s_mov_b32 s9, s17
9583 ; GFX90A-NEXT: s_mov_b32 s10, s12
9584 ; GFX90A-NEXT: s_mov_b32 s11, s13
9585 ; GFX90A-NEXT: s_mov_b32 s12, s4
9586 ; GFX90A-NEXT: s_mov_b32 s13, s5
9587 ; GFX90A-NEXT: s_mov_b32 s14, s4
9588 ; GFX90A-NEXT: s_mov_b32 s15, s5
9589 ; GFX90A-NEXT: ;;#ASMSTART
9590 ; GFX90A-NEXT: ; use s[8:15]
9591 ; GFX90A-NEXT: ;;#ASMEND
9592 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9594 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
9596 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9597 ; GFX940-NEXT: ;;#ASMSTART
9598 ; GFX940-NEXT: ; def s[0:5]
9599 ; GFX940-NEXT: ;;#ASMEND
9600 ; GFX940-NEXT: s_mov_b32 s12, s0
9601 ; GFX940-NEXT: ;;#ASMSTART
9602 ; GFX940-NEXT: ; def s[4:9]
9603 ; GFX940-NEXT: ;;#ASMEND
9604 ; GFX940-NEXT: s_mov_b32 s10, s4
9605 ; GFX940-NEXT: s_mov_b32 s11, s5
9606 ; GFX940-NEXT: s_mov_b32 s13, s1
9607 ; GFX940-NEXT: s_mov_b32 s14, s0
9608 ; GFX940-NEXT: s_mov_b32 s15, s1
9609 ; GFX940-NEXT: ;;#ASMSTART
9610 ; GFX940-NEXT: ; use s[8:15]
9611 ; GFX940-NEXT: ;;#ASMEND
9612 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9613 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9614 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
9616 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9620 define void @s_shuffle_v4i64_v3i64__5_4_0_0() {
9621 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
9623 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9624 ; GFX900-NEXT: ;;#ASMSTART
9625 ; GFX900-NEXT: ; def s[4:9]
9626 ; GFX900-NEXT: ;;#ASMEND
9627 ; GFX900-NEXT: ;;#ASMSTART
9628 ; GFX900-NEXT: ; def s[8:13]
9629 ; GFX900-NEXT: ;;#ASMEND
9630 ; GFX900-NEXT: s_mov_b32 s8, s12
9631 ; GFX900-NEXT: s_mov_b32 s9, s13
9632 ; GFX900-NEXT: s_mov_b32 s12, s4
9633 ; GFX900-NEXT: s_mov_b32 s13, s5
9634 ; GFX900-NEXT: s_mov_b32 s14, s4
9635 ; GFX900-NEXT: s_mov_b32 s15, s5
9636 ; GFX900-NEXT: ;;#ASMSTART
9637 ; GFX900-NEXT: ; use s[8:15]
9638 ; GFX900-NEXT: ;;#ASMEND
9639 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9641 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
9643 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9644 ; GFX90A-NEXT: ;;#ASMSTART
9645 ; GFX90A-NEXT: ; def s[4:9]
9646 ; GFX90A-NEXT: ;;#ASMEND
9647 ; GFX90A-NEXT: ;;#ASMSTART
9648 ; GFX90A-NEXT: ; def s[8:13]
9649 ; GFX90A-NEXT: ;;#ASMEND
9650 ; GFX90A-NEXT: s_mov_b32 s8, s12
9651 ; GFX90A-NEXT: s_mov_b32 s9, s13
9652 ; GFX90A-NEXT: s_mov_b32 s12, s4
9653 ; GFX90A-NEXT: s_mov_b32 s13, s5
9654 ; GFX90A-NEXT: s_mov_b32 s14, s4
9655 ; GFX90A-NEXT: s_mov_b32 s15, s5
9656 ; GFX90A-NEXT: ;;#ASMSTART
9657 ; GFX90A-NEXT: ; use s[8:15]
9658 ; GFX90A-NEXT: ;;#ASMEND
9659 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9661 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
9663 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9664 ; GFX940-NEXT: ;;#ASMSTART
9665 ; GFX940-NEXT: ; def s[8:13]
9666 ; GFX940-NEXT: ;;#ASMEND
9667 ; GFX940-NEXT: ;;#ASMSTART
9668 ; GFX940-NEXT: ; def s[0:5]
9669 ; GFX940-NEXT: ;;#ASMEND
9670 ; GFX940-NEXT: s_mov_b32 s8, s12
9671 ; GFX940-NEXT: s_mov_b32 s9, s13
9672 ; GFX940-NEXT: s_mov_b32 s12, s0
9673 ; GFX940-NEXT: s_mov_b32 s13, s1
9674 ; GFX940-NEXT: s_mov_b32 s14, s0
9675 ; GFX940-NEXT: s_mov_b32 s15, s1
9676 ; GFX940-NEXT: ;;#ASMSTART
9677 ; GFX940-NEXT: ; use s[8:15]
9678 ; GFX940-NEXT: ;;#ASMEND
9679 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9680 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9681 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9682 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
9683 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9687 define void @s_shuffle_v4i64_v3i64__5_5_0_0() {
9688 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
9690 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9691 ; GFX900-NEXT: ;;#ASMSTART
9692 ; GFX900-NEXT: ; def s[4:9]
9693 ; GFX900-NEXT: ;;#ASMEND
9694 ; GFX900-NEXT: ;;#ASMSTART
9695 ; GFX900-NEXT: ; def s[8:13]
9696 ; GFX900-NEXT: ;;#ASMEND
9697 ; GFX900-NEXT: s_mov_b32 s8, s12
9698 ; GFX900-NEXT: s_mov_b32 s9, s13
9699 ; GFX900-NEXT: s_mov_b32 s10, s12
9700 ; GFX900-NEXT: s_mov_b32 s11, s13
9701 ; GFX900-NEXT: s_mov_b32 s12, s4
9702 ; GFX900-NEXT: s_mov_b32 s13, s5
9703 ; GFX900-NEXT: s_mov_b32 s14, s4
9704 ; GFX900-NEXT: s_mov_b32 s15, s5
9705 ; GFX900-NEXT: ;;#ASMSTART
9706 ; GFX900-NEXT: ; use s[8:15]
9707 ; GFX900-NEXT: ;;#ASMEND
9708 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9710 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
9712 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9713 ; GFX90A-NEXT: ;;#ASMSTART
9714 ; GFX90A-NEXT: ; def s[4:9]
9715 ; GFX90A-NEXT: ;;#ASMEND
9716 ; GFX90A-NEXT: ;;#ASMSTART
9717 ; GFX90A-NEXT: ; def s[8:13]
9718 ; GFX90A-NEXT: ;;#ASMEND
9719 ; GFX90A-NEXT: s_mov_b32 s8, s12
9720 ; GFX90A-NEXT: s_mov_b32 s9, s13
9721 ; GFX90A-NEXT: s_mov_b32 s10, s12
9722 ; GFX90A-NEXT: s_mov_b32 s11, s13
9723 ; GFX90A-NEXT: s_mov_b32 s12, s4
9724 ; GFX90A-NEXT: s_mov_b32 s13, s5
9725 ; GFX90A-NEXT: s_mov_b32 s14, s4
9726 ; GFX90A-NEXT: s_mov_b32 s15, s5
9727 ; GFX90A-NEXT: ;;#ASMSTART
9728 ; GFX90A-NEXT: ; use s[8:15]
9729 ; GFX90A-NEXT: ;;#ASMEND
9730 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9732 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
9734 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9735 ; GFX940-NEXT: ;;#ASMSTART
9736 ; GFX940-NEXT: ; def s[8:13]
9737 ; GFX940-NEXT: ;;#ASMEND
9738 ; GFX940-NEXT: ;;#ASMSTART
9739 ; GFX940-NEXT: ; def s[0:5]
9740 ; GFX940-NEXT: ;;#ASMEND
9741 ; GFX940-NEXT: s_mov_b32 s8, s12
9742 ; GFX940-NEXT: s_mov_b32 s9, s13
9743 ; GFX940-NEXT: s_mov_b32 s10, s12
9744 ; GFX940-NEXT: s_mov_b32 s11, s13
9745 ; GFX940-NEXT: s_mov_b32 s12, s0
9746 ; GFX940-NEXT: s_mov_b32 s13, s1
9747 ; GFX940-NEXT: s_mov_b32 s14, s0
9748 ; GFX940-NEXT: s_mov_b32 s15, s1
9749 ; GFX940-NEXT: ;;#ASMSTART
9750 ; GFX940-NEXT: ; use s[8:15]
9751 ; GFX940-NEXT: ;;#ASMEND
9752 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9753 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9754 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9755 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
9756 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9760 define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
9761 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
9763 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9764 ; GFX900-NEXT: ;;#ASMSTART
9765 ; GFX900-NEXT: ; def s[4:9]
9766 ; GFX900-NEXT: ;;#ASMEND
9767 ; GFX900-NEXT: ;;#ASMSTART
9768 ; GFX900-NEXT: ; def s[8:13]
9769 ; GFX900-NEXT: ;;#ASMEND
9770 ; GFX900-NEXT: s_mov_b32 s8, s12
9771 ; GFX900-NEXT: s_mov_b32 s9, s13
9772 ; GFX900-NEXT: s_mov_b32 s10, s12
9773 ; GFX900-NEXT: s_mov_b32 s11, s13
9774 ; GFX900-NEXT: s_mov_b32 s14, s4
9775 ; GFX900-NEXT: s_mov_b32 s15, s5
9776 ; GFX900-NEXT: ;;#ASMSTART
9777 ; GFX900-NEXT: ; use s[8:15]
9778 ; GFX900-NEXT: ;;#ASMEND
9779 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9781 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
9783 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9784 ; GFX90A-NEXT: ;;#ASMSTART
9785 ; GFX90A-NEXT: ; def s[4:9]
9786 ; GFX90A-NEXT: ;;#ASMEND
9787 ; GFX90A-NEXT: ;;#ASMSTART
9788 ; GFX90A-NEXT: ; def s[8:13]
9789 ; GFX90A-NEXT: ;;#ASMEND
9790 ; GFX90A-NEXT: s_mov_b32 s8, s12
9791 ; GFX90A-NEXT: s_mov_b32 s9, s13
9792 ; GFX90A-NEXT: s_mov_b32 s10, s12
9793 ; GFX90A-NEXT: s_mov_b32 s11, s13
9794 ; GFX90A-NEXT: s_mov_b32 s14, s4
9795 ; GFX90A-NEXT: s_mov_b32 s15, s5
9796 ; GFX90A-NEXT: ;;#ASMSTART
9797 ; GFX90A-NEXT: ; use s[8:15]
9798 ; GFX90A-NEXT: ;;#ASMEND
9799 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9801 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
9803 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9804 ; GFX940-NEXT: ;;#ASMSTART
9805 ; GFX940-NEXT: ; def s[8:13]
9806 ; GFX940-NEXT: ;;#ASMEND
9807 ; GFX940-NEXT: ;;#ASMSTART
9808 ; GFX940-NEXT: ; def s[0:5]
9809 ; GFX940-NEXT: ;;#ASMEND
9810 ; GFX940-NEXT: s_mov_b32 s8, s12
9811 ; GFX940-NEXT: s_mov_b32 s9, s13
9812 ; GFX940-NEXT: s_mov_b32 s10, s12
9813 ; GFX940-NEXT: s_mov_b32 s11, s13
9814 ; GFX940-NEXT: s_mov_b32 s14, s0
9815 ; GFX940-NEXT: s_mov_b32 s15, s1
9816 ; GFX940-NEXT: ;;#ASMSTART
9817 ; GFX940-NEXT: ; use s[8:15]
9818 ; GFX940-NEXT: ;;#ASMEND
9819 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9820 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9821 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9822 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
9823 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9827 define void @s_shuffle_v4i64_v3i64__5_5_1_0() {
9828 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
9830 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9831 ; GFX900-NEXT: ;;#ASMSTART
9832 ; GFX900-NEXT: ; def s[4:9]
9833 ; GFX900-NEXT: ;;#ASMEND
9834 ; GFX900-NEXT: ;;#ASMSTART
9835 ; GFX900-NEXT: ; def s[8:13]
9836 ; GFX900-NEXT: ;;#ASMEND
9837 ; GFX900-NEXT: s_mov_b32 s8, s12
9838 ; GFX900-NEXT: s_mov_b32 s9, s13
9839 ; GFX900-NEXT: s_mov_b32 s10, s12
9840 ; GFX900-NEXT: s_mov_b32 s11, s13
9841 ; GFX900-NEXT: s_mov_b32 s12, s6
9842 ; GFX900-NEXT: s_mov_b32 s13, s7
9843 ; GFX900-NEXT: s_mov_b32 s14, s4
9844 ; GFX900-NEXT: s_mov_b32 s15, s5
9845 ; GFX900-NEXT: ;;#ASMSTART
9846 ; GFX900-NEXT: ; use s[8:15]
9847 ; GFX900-NEXT: ;;#ASMEND
9848 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9850 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
9852 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9853 ; GFX90A-NEXT: ;;#ASMSTART
9854 ; GFX90A-NEXT: ; def s[4:9]
9855 ; GFX90A-NEXT: ;;#ASMEND
9856 ; GFX90A-NEXT: ;;#ASMSTART
9857 ; GFX90A-NEXT: ; def s[8:13]
9858 ; GFX90A-NEXT: ;;#ASMEND
9859 ; GFX90A-NEXT: s_mov_b32 s8, s12
9860 ; GFX90A-NEXT: s_mov_b32 s9, s13
9861 ; GFX90A-NEXT: s_mov_b32 s10, s12
9862 ; GFX90A-NEXT: s_mov_b32 s11, s13
9863 ; GFX90A-NEXT: s_mov_b32 s12, s6
9864 ; GFX90A-NEXT: s_mov_b32 s13, s7
9865 ; GFX90A-NEXT: s_mov_b32 s14, s4
9866 ; GFX90A-NEXT: s_mov_b32 s15, s5
9867 ; GFX90A-NEXT: ;;#ASMSTART
9868 ; GFX90A-NEXT: ; use s[8:15]
9869 ; GFX90A-NEXT: ;;#ASMEND
9870 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9872 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
9874 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9875 ; GFX940-NEXT: ;;#ASMSTART
9876 ; GFX940-NEXT: ; def s[8:13]
9877 ; GFX940-NEXT: ;;#ASMEND
9878 ; GFX940-NEXT: ;;#ASMSTART
9879 ; GFX940-NEXT: ; def s[0:5]
9880 ; GFX940-NEXT: ;;#ASMEND
9881 ; GFX940-NEXT: s_mov_b32 s8, s12
9882 ; GFX940-NEXT: s_mov_b32 s9, s13
9883 ; GFX940-NEXT: s_mov_b32 s10, s12
9884 ; GFX940-NEXT: s_mov_b32 s11, s13
9885 ; GFX940-NEXT: s_mov_b32 s12, s2
9886 ; GFX940-NEXT: s_mov_b32 s13, s3
9887 ; GFX940-NEXT: s_mov_b32 s14, s0
9888 ; GFX940-NEXT: s_mov_b32 s15, s1
9889 ; GFX940-NEXT: ;;#ASMSTART
9890 ; GFX940-NEXT: ; use s[8:15]
9891 ; GFX940-NEXT: ;;#ASMEND
9892 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9893 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9894 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9895 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
9896 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9900 define void @s_shuffle_v4i64_v3i64__5_5_2_0() {
9901 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
9903 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9904 ; GFX900-NEXT: ;;#ASMSTART
9905 ; GFX900-NEXT: ; def s[8:13]
9906 ; GFX900-NEXT: ;;#ASMEND
9907 ; GFX900-NEXT: ;;#ASMSTART
9908 ; GFX900-NEXT: ; def s[16:21]
9909 ; GFX900-NEXT: ;;#ASMEND
9910 ; GFX900-NEXT: s_mov_b32 s8, s12
9911 ; GFX900-NEXT: s_mov_b32 s9, s13
9912 ; GFX900-NEXT: s_mov_b32 s10, s12
9913 ; GFX900-NEXT: s_mov_b32 s11, s13
9914 ; GFX900-NEXT: s_mov_b32 s12, s20
9915 ; GFX900-NEXT: s_mov_b32 s13, s21
9916 ; GFX900-NEXT: s_mov_b32 s14, s16
9917 ; GFX900-NEXT: s_mov_b32 s15, s17
9918 ; GFX900-NEXT: ;;#ASMSTART
9919 ; GFX900-NEXT: ; use s[8:15]
9920 ; GFX900-NEXT: ;;#ASMEND
9921 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9923 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
9925 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9926 ; GFX90A-NEXT: ;;#ASMSTART
9927 ; GFX90A-NEXT: ; def s[8:13]
9928 ; GFX90A-NEXT: ;;#ASMEND
9929 ; GFX90A-NEXT: ;;#ASMSTART
9930 ; GFX90A-NEXT: ; def s[16:21]
9931 ; GFX90A-NEXT: ;;#ASMEND
9932 ; GFX90A-NEXT: s_mov_b32 s8, s12
9933 ; GFX90A-NEXT: s_mov_b32 s9, s13
9934 ; GFX90A-NEXT: s_mov_b32 s10, s12
9935 ; GFX90A-NEXT: s_mov_b32 s11, s13
9936 ; GFX90A-NEXT: s_mov_b32 s12, s20
9937 ; GFX90A-NEXT: s_mov_b32 s13, s21
9938 ; GFX90A-NEXT: s_mov_b32 s14, s16
9939 ; GFX90A-NEXT: s_mov_b32 s15, s17
9940 ; GFX90A-NEXT: ;;#ASMSTART
9941 ; GFX90A-NEXT: ; use s[8:15]
9942 ; GFX90A-NEXT: ;;#ASMEND
9943 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9945 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
9947 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9948 ; GFX940-NEXT: ;;#ASMSTART
9949 ; GFX940-NEXT: ; def s[8:13]
9950 ; GFX940-NEXT: ;;#ASMEND
9951 ; GFX940-NEXT: ;;#ASMSTART
9952 ; GFX940-NEXT: ; def s[0:5]
9953 ; GFX940-NEXT: ;;#ASMEND
9954 ; GFX940-NEXT: s_mov_b32 s8, s12
9955 ; GFX940-NEXT: s_mov_b32 s9, s13
9956 ; GFX940-NEXT: s_mov_b32 s10, s12
9957 ; GFX940-NEXT: s_mov_b32 s11, s13
9958 ; GFX940-NEXT: s_mov_b32 s12, s4
9959 ; GFX940-NEXT: s_mov_b32 s13, s5
9960 ; GFX940-NEXT: s_mov_b32 s14, s0
9961 ; GFX940-NEXT: s_mov_b32 s15, s1
9962 ; GFX940-NEXT: ;;#ASMSTART
9963 ; GFX940-NEXT: ; use s[8:15]
9964 ; GFX940-NEXT: ;;#ASMEND
9965 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9966 %vec0 = call <3 x i64> asm "; def $0", "=s"()
9967 %vec1 = call <3 x i64> asm "; def $0", "=s"()
9968 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
9969 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9973 define void @s_shuffle_v4i64_v3i64__5_5_3_0() {
9974 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
9976 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9977 ; GFX900-NEXT: ;;#ASMSTART
9978 ; GFX900-NEXT: ; def s[4:9]
9979 ; GFX900-NEXT: ;;#ASMEND
9980 ; GFX900-NEXT: ;;#ASMSTART
9981 ; GFX900-NEXT: ; def s[12:17]
9982 ; GFX900-NEXT: ;;#ASMEND
9983 ; GFX900-NEXT: s_mov_b32 s8, s16
9984 ; GFX900-NEXT: s_mov_b32 s9, s17
9985 ; GFX900-NEXT: s_mov_b32 s10, s16
9986 ; GFX900-NEXT: s_mov_b32 s11, s17
9987 ; GFX900-NEXT: s_mov_b32 s14, s4
9988 ; GFX900-NEXT: s_mov_b32 s15, s5
9989 ; GFX900-NEXT: ;;#ASMSTART
9990 ; GFX900-NEXT: ; use s[8:15]
9991 ; GFX900-NEXT: ;;#ASMEND
9992 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9994 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
9996 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9997 ; GFX90A-NEXT: ;;#ASMSTART
9998 ; GFX90A-NEXT: ; def s[4:9]
9999 ; GFX90A-NEXT: ;;#ASMEND
10000 ; GFX90A-NEXT: ;;#ASMSTART
10001 ; GFX90A-NEXT: ; def s[12:17]
10002 ; GFX90A-NEXT: ;;#ASMEND
10003 ; GFX90A-NEXT: s_mov_b32 s8, s16
10004 ; GFX90A-NEXT: s_mov_b32 s9, s17
10005 ; GFX90A-NEXT: s_mov_b32 s10, s16
10006 ; GFX90A-NEXT: s_mov_b32 s11, s17
10007 ; GFX90A-NEXT: s_mov_b32 s14, s4
10008 ; GFX90A-NEXT: s_mov_b32 s15, s5
10009 ; GFX90A-NEXT: ;;#ASMSTART
10010 ; GFX90A-NEXT: ; use s[8:15]
10011 ; GFX90A-NEXT: ;;#ASMEND
10012 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10014 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
10016 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10017 ; GFX940-NEXT: ;;#ASMSTART
10018 ; GFX940-NEXT: ; def s[12:17]
10019 ; GFX940-NEXT: ;;#ASMEND
10020 ; GFX940-NEXT: ;;#ASMSTART
10021 ; GFX940-NEXT: ; def s[0:5]
10022 ; GFX940-NEXT: ;;#ASMEND
10023 ; GFX940-NEXT: s_mov_b32 s8, s16
10024 ; GFX940-NEXT: s_mov_b32 s9, s17
10025 ; GFX940-NEXT: s_mov_b32 s10, s16
10026 ; GFX940-NEXT: s_mov_b32 s11, s17
10027 ; GFX940-NEXT: s_mov_b32 s14, s0
10028 ; GFX940-NEXT: s_mov_b32 s15, s1
10029 ; GFX940-NEXT: ;;#ASMSTART
10030 ; GFX940-NEXT: ; use s[8:15]
10031 ; GFX940-NEXT: ;;#ASMEND
10032 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10033 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10034 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10035 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
10036 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10040 define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
10041 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
10043 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10044 ; GFX900-NEXT: ;;#ASMSTART
10045 ; GFX900-NEXT: ; def s[4:9]
10046 ; GFX900-NEXT: ;;#ASMEND
10047 ; GFX900-NEXT: ;;#ASMSTART
10048 ; GFX900-NEXT: ; def s[12:17]
10049 ; GFX900-NEXT: ;;#ASMEND
10050 ; GFX900-NEXT: s_mov_b32 s8, s16
10051 ; GFX900-NEXT: s_mov_b32 s9, s17
10052 ; GFX900-NEXT: s_mov_b32 s10, s16
10053 ; GFX900-NEXT: s_mov_b32 s11, s17
10054 ; GFX900-NEXT: s_mov_b32 s12, s14
10055 ; GFX900-NEXT: s_mov_b32 s13, s15
10056 ; GFX900-NEXT: s_mov_b32 s14, s4
10057 ; GFX900-NEXT: s_mov_b32 s15, s5
10058 ; GFX900-NEXT: ;;#ASMSTART
10059 ; GFX900-NEXT: ; use s[8:15]
10060 ; GFX900-NEXT: ;;#ASMEND
10061 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10063 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
10065 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10066 ; GFX90A-NEXT: ;;#ASMSTART
10067 ; GFX90A-NEXT: ; def s[4:9]
10068 ; GFX90A-NEXT: ;;#ASMEND
10069 ; GFX90A-NEXT: ;;#ASMSTART
10070 ; GFX90A-NEXT: ; def s[12:17]
10071 ; GFX90A-NEXT: ;;#ASMEND
10072 ; GFX90A-NEXT: s_mov_b32 s8, s16
10073 ; GFX90A-NEXT: s_mov_b32 s9, s17
10074 ; GFX90A-NEXT: s_mov_b32 s10, s16
10075 ; GFX90A-NEXT: s_mov_b32 s11, s17
10076 ; GFX90A-NEXT: s_mov_b32 s12, s14
10077 ; GFX90A-NEXT: s_mov_b32 s13, s15
10078 ; GFX90A-NEXT: s_mov_b32 s14, s4
10079 ; GFX90A-NEXT: s_mov_b32 s15, s5
10080 ; GFX90A-NEXT: ;;#ASMSTART
10081 ; GFX90A-NEXT: ; use s[8:15]
10082 ; GFX90A-NEXT: ;;#ASMEND
10083 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10085 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
10087 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10088 ; GFX940-NEXT: ;;#ASMSTART
10089 ; GFX940-NEXT: ; def s[12:17]
10090 ; GFX940-NEXT: ;;#ASMEND
10091 ; GFX940-NEXT: ;;#ASMSTART
10092 ; GFX940-NEXT: ; def s[0:5]
10093 ; GFX940-NEXT: ;;#ASMEND
10094 ; GFX940-NEXT: s_mov_b32 s8, s16
10095 ; GFX940-NEXT: s_mov_b32 s9, s17
10096 ; GFX940-NEXT: s_mov_b32 s10, s16
10097 ; GFX940-NEXT: s_mov_b32 s11, s17
10098 ; GFX940-NEXT: s_mov_b32 s12, s14
10099 ; GFX940-NEXT: s_mov_b32 s13, s15
10100 ; GFX940-NEXT: s_mov_b32 s14, s0
10101 ; GFX940-NEXT: s_mov_b32 s15, s1
10102 ; GFX940-NEXT: ;;#ASMSTART
10103 ; GFX940-NEXT: ; use s[8:15]
10104 ; GFX940-NEXT: ;;#ASMEND
10105 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10106 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10107 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10108 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
10109 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10113 define void @s_shuffle_v4i64_v3i64__u_1_1_1() {
10114 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
10116 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10117 ; GFX9-NEXT: ;;#ASMSTART
10118 ; GFX9-NEXT: ; def s[8:13]
10119 ; GFX9-NEXT: ;;#ASMEND
10120 ; GFX9-NEXT: s_mov_b32 s12, s10
10121 ; GFX9-NEXT: s_mov_b32 s13, s11
10122 ; GFX9-NEXT: s_mov_b32 s14, s10
10123 ; GFX9-NEXT: s_mov_b32 s15, s11
10124 ; GFX9-NEXT: ;;#ASMSTART
10125 ; GFX9-NEXT: ; use s[8:15]
10126 ; GFX9-NEXT: ;;#ASMEND
10127 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10128 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10129 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
10130 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10134 define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
10135 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
10137 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10138 ; GFX9-NEXT: ;;#ASMSTART
10139 ; GFX9-NEXT: ; def s[8:13]
10140 ; GFX9-NEXT: ;;#ASMEND
10141 ; GFX9-NEXT: s_mov_b32 s12, s10
10142 ; GFX9-NEXT: s_mov_b32 s13, s11
10143 ; GFX9-NEXT: s_mov_b32 s14, s10
10144 ; GFX9-NEXT: s_mov_b32 s15, s11
10145 ; GFX9-NEXT: ;;#ASMSTART
10146 ; GFX9-NEXT: ; use s[8:15]
10147 ; GFX9-NEXT: ;;#ASMEND
10148 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10149 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10150 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
10151 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10155 define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
10156 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
10158 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10159 ; GFX9-NEXT: ;;#ASMSTART
10160 ; GFX9-NEXT: ; def s[8:13]
10161 ; GFX9-NEXT: ;;#ASMEND
10162 ; GFX9-NEXT: s_mov_b32 s8, s10
10163 ; GFX9-NEXT: s_mov_b32 s9, s11
10164 ; GFX9-NEXT: s_mov_b32 s12, s10
10165 ; GFX9-NEXT: s_mov_b32 s13, s11
10166 ; GFX9-NEXT: s_mov_b32 s14, s10
10167 ; GFX9-NEXT: s_mov_b32 s15, s11
10168 ; GFX9-NEXT: ;;#ASMSTART
10169 ; GFX9-NEXT: ; use s[8:15]
10170 ; GFX9-NEXT: ;;#ASMEND
10171 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10172 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10173 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10174 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10178 define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
10179 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
10181 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10182 ; GFX9-NEXT: ;;#ASMSTART
10183 ; GFX9-NEXT: ; def s[8:13]
10184 ; GFX9-NEXT: ;;#ASMEND
10185 ; GFX9-NEXT: s_mov_b32 s8, s12
10186 ; GFX9-NEXT: s_mov_b32 s9, s13
10187 ; GFX9-NEXT: s_mov_b32 s12, s10
10188 ; GFX9-NEXT: s_mov_b32 s13, s11
10189 ; GFX9-NEXT: s_mov_b32 s14, s10
10190 ; GFX9-NEXT: s_mov_b32 s15, s11
10191 ; GFX9-NEXT: ;;#ASMSTART
10192 ; GFX9-NEXT: ; use s[8:15]
10193 ; GFX9-NEXT: ;;#ASMEND
10194 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10195 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10196 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
10197 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10201 define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
10202 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
10204 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10205 ; GFX9-NEXT: ;;#ASMSTART
10206 ; GFX9-NEXT: ; def s[8:13]
10207 ; GFX9-NEXT: ;;#ASMEND
10208 ; GFX9-NEXT: s_mov_b32 s12, s10
10209 ; GFX9-NEXT: s_mov_b32 s13, s11
10210 ; GFX9-NEXT: s_mov_b32 s14, s10
10211 ; GFX9-NEXT: s_mov_b32 s15, s11
10212 ; GFX9-NEXT: ;;#ASMSTART
10213 ; GFX9-NEXT: ; use s[8:15]
10214 ; GFX9-NEXT: ;;#ASMEND
10215 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10216 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10217 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
10218 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10222 define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
10223 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
10225 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10226 ; GFX900-NEXT: ;;#ASMSTART
10227 ; GFX900-NEXT: ; def s[8:13]
10228 ; GFX900-NEXT: ;;#ASMEND
10229 ; GFX900-NEXT: ;;#ASMSTART
10230 ; GFX900-NEXT: ; def s[4:9]
10231 ; GFX900-NEXT: ;;#ASMEND
10232 ; GFX900-NEXT: s_mov_b32 s8, s6
10233 ; GFX900-NEXT: s_mov_b32 s9, s7
10234 ; GFX900-NEXT: s_mov_b32 s12, s10
10235 ; GFX900-NEXT: s_mov_b32 s13, s11
10236 ; GFX900-NEXT: s_mov_b32 s14, s10
10237 ; GFX900-NEXT: s_mov_b32 s15, s11
10238 ; GFX900-NEXT: ;;#ASMSTART
10239 ; GFX900-NEXT: ; use s[8:15]
10240 ; GFX900-NEXT: ;;#ASMEND
10241 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10243 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
10245 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10246 ; GFX90A-NEXT: ;;#ASMSTART
10247 ; GFX90A-NEXT: ; def s[8:13]
10248 ; GFX90A-NEXT: ;;#ASMEND
10249 ; GFX90A-NEXT: ;;#ASMSTART
10250 ; GFX90A-NEXT: ; def s[4:9]
10251 ; GFX90A-NEXT: ;;#ASMEND
10252 ; GFX90A-NEXT: s_mov_b32 s8, s6
10253 ; GFX90A-NEXT: s_mov_b32 s9, s7
10254 ; GFX90A-NEXT: s_mov_b32 s12, s10
10255 ; GFX90A-NEXT: s_mov_b32 s13, s11
10256 ; GFX90A-NEXT: s_mov_b32 s14, s10
10257 ; GFX90A-NEXT: s_mov_b32 s15, s11
10258 ; GFX90A-NEXT: ;;#ASMSTART
10259 ; GFX90A-NEXT: ; use s[8:15]
10260 ; GFX90A-NEXT: ;;#ASMEND
10261 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10263 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
10265 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10266 ; GFX940-NEXT: ;;#ASMSTART
10267 ; GFX940-NEXT: ; def s[8:13]
10268 ; GFX940-NEXT: ;;#ASMEND
10269 ; GFX940-NEXT: ;;#ASMSTART
10270 ; GFX940-NEXT: ; def s[0:5]
10271 ; GFX940-NEXT: ;;#ASMEND
10272 ; GFX940-NEXT: s_mov_b32 s8, s2
10273 ; GFX940-NEXT: s_mov_b32 s9, s3
10274 ; GFX940-NEXT: s_mov_b32 s12, s10
10275 ; GFX940-NEXT: s_mov_b32 s13, s11
10276 ; GFX940-NEXT: s_mov_b32 s14, s10
10277 ; GFX940-NEXT: s_mov_b32 s15, s11
10278 ; GFX940-NEXT: ;;#ASMSTART
10279 ; GFX940-NEXT: ; use s[8:15]
10280 ; GFX940-NEXT: ;;#ASMEND
10281 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10282 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10283 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10284 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
10285 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10289 define void @s_shuffle_v4i64_v3i64__5_1_1_1() {
10290 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
10292 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10293 ; GFX900-NEXT: ;;#ASMSTART
10294 ; GFX900-NEXT: ; def s[8:13]
10295 ; GFX900-NEXT: ;;#ASMEND
10296 ; GFX900-NEXT: ;;#ASMSTART
10297 ; GFX900-NEXT: ; def s[4:9]
10298 ; GFX900-NEXT: ;;#ASMEND
10299 ; GFX900-NEXT: s_mov_b32 s12, s10
10300 ; GFX900-NEXT: s_mov_b32 s13, s11
10301 ; GFX900-NEXT: s_mov_b32 s14, s10
10302 ; GFX900-NEXT: s_mov_b32 s15, s11
10303 ; GFX900-NEXT: ;;#ASMSTART
10304 ; GFX900-NEXT: ; use s[8:15]
10305 ; GFX900-NEXT: ;;#ASMEND
10306 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10308 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
10310 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10311 ; GFX90A-NEXT: ;;#ASMSTART
10312 ; GFX90A-NEXT: ; def s[8:13]
10313 ; GFX90A-NEXT: ;;#ASMEND
10314 ; GFX90A-NEXT: ;;#ASMSTART
10315 ; GFX90A-NEXT: ; def s[4:9]
10316 ; GFX90A-NEXT: ;;#ASMEND
10317 ; GFX90A-NEXT: s_mov_b32 s12, s10
10318 ; GFX90A-NEXT: s_mov_b32 s13, s11
10319 ; GFX90A-NEXT: s_mov_b32 s14, s10
10320 ; GFX90A-NEXT: s_mov_b32 s15, s11
10321 ; GFX90A-NEXT: ;;#ASMSTART
10322 ; GFX90A-NEXT: ; use s[8:15]
10323 ; GFX90A-NEXT: ;;#ASMEND
10324 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10326 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
10328 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10329 ; GFX940-NEXT: ;;#ASMSTART
10330 ; GFX940-NEXT: ; def s[8:13]
10331 ; GFX940-NEXT: ;;#ASMEND
10332 ; GFX940-NEXT: ;;#ASMSTART
10333 ; GFX940-NEXT: ; def s[0:5]
10334 ; GFX940-NEXT: ;;#ASMEND
10335 ; GFX940-NEXT: s_mov_b32 s8, s4
10336 ; GFX940-NEXT: s_mov_b32 s9, s5
10337 ; GFX940-NEXT: s_mov_b32 s12, s10
10338 ; GFX940-NEXT: s_mov_b32 s13, s11
10339 ; GFX940-NEXT: s_mov_b32 s14, s10
10340 ; GFX940-NEXT: s_mov_b32 s15, s11
10341 ; GFX940-NEXT: ;;#ASMSTART
10342 ; GFX940-NEXT: ; use s[8:15]
10343 ; GFX940-NEXT: ;;#ASMEND
10344 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10345 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10346 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10347 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
10348 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10352 define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
10353 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
10355 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10356 ; GFX900-NEXT: ;;#ASMSTART
10357 ; GFX900-NEXT: ; def s[4:9]
10358 ; GFX900-NEXT: ;;#ASMEND
10359 ; GFX900-NEXT: ;;#ASMSTART
10360 ; GFX900-NEXT: ; def s[8:13]
10361 ; GFX900-NEXT: ;;#ASMEND
10362 ; GFX900-NEXT: s_mov_b32 s8, s12
10363 ; GFX900-NEXT: s_mov_b32 s9, s13
10364 ; GFX900-NEXT: s_mov_b32 s12, s6
10365 ; GFX900-NEXT: s_mov_b32 s13, s7
10366 ; GFX900-NEXT: s_mov_b32 s14, s6
10367 ; GFX900-NEXT: s_mov_b32 s15, s7
10368 ; GFX900-NEXT: ;;#ASMSTART
10369 ; GFX900-NEXT: ; use s[8:15]
10370 ; GFX900-NEXT: ;;#ASMEND
10371 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10373 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
10375 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10376 ; GFX90A-NEXT: ;;#ASMSTART
10377 ; GFX90A-NEXT: ; def s[4:9]
10378 ; GFX90A-NEXT: ;;#ASMEND
10379 ; GFX90A-NEXT: ;;#ASMSTART
10380 ; GFX90A-NEXT: ; def s[8:13]
10381 ; GFX90A-NEXT: ;;#ASMEND
10382 ; GFX90A-NEXT: s_mov_b32 s8, s12
10383 ; GFX90A-NEXT: s_mov_b32 s9, s13
10384 ; GFX90A-NEXT: s_mov_b32 s12, s6
10385 ; GFX90A-NEXT: s_mov_b32 s13, s7
10386 ; GFX90A-NEXT: s_mov_b32 s14, s6
10387 ; GFX90A-NEXT: s_mov_b32 s15, s7
10388 ; GFX90A-NEXT: ;;#ASMSTART
10389 ; GFX90A-NEXT: ; use s[8:15]
10390 ; GFX90A-NEXT: ;;#ASMEND
10391 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10393 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
10395 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10396 ; GFX940-NEXT: ;;#ASMSTART
10397 ; GFX940-NEXT: ; def s[0:5]
10398 ; GFX940-NEXT: ;;#ASMEND
10399 ; GFX940-NEXT: s_mov_b32 s12, s2
10400 ; GFX940-NEXT: ;;#ASMSTART
10401 ; GFX940-NEXT: ; def s[4:9]
10402 ; GFX940-NEXT: ;;#ASMEND
10403 ; GFX940-NEXT: s_mov_b32 s13, s3
10404 ; GFX940-NEXT: s_mov_b32 s14, s2
10405 ; GFX940-NEXT: s_mov_b32 s15, s3
10406 ; GFX940-NEXT: ;;#ASMSTART
10407 ; GFX940-NEXT: ; use s[8:15]
10408 ; GFX940-NEXT: ;;#ASMEND
10409 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10410 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10411 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10412 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
10413 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10417 define void @s_shuffle_v4i64_v3i64__5_0_1_1() {
10418 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
10420 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10421 ; GFX900-NEXT: ;;#ASMSTART
10422 ; GFX900-NEXT: ; def s[4:9]
10423 ; GFX900-NEXT: ;;#ASMEND
10424 ; GFX900-NEXT: ;;#ASMSTART
10425 ; GFX900-NEXT: ; def s[8:13]
10426 ; GFX900-NEXT: ;;#ASMEND
10427 ; GFX900-NEXT: s_mov_b32 s8, s12
10428 ; GFX900-NEXT: s_mov_b32 s9, s13
10429 ; GFX900-NEXT: s_mov_b32 s10, s4
10430 ; GFX900-NEXT: s_mov_b32 s11, s5
10431 ; GFX900-NEXT: s_mov_b32 s12, s6
10432 ; GFX900-NEXT: s_mov_b32 s13, s7
10433 ; GFX900-NEXT: s_mov_b32 s14, s6
10434 ; GFX900-NEXT: s_mov_b32 s15, s7
10435 ; GFX900-NEXT: ;;#ASMSTART
10436 ; GFX900-NEXT: ; use s[8:15]
10437 ; GFX900-NEXT: ;;#ASMEND
10438 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10440 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
10442 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10443 ; GFX90A-NEXT: ;;#ASMSTART
10444 ; GFX90A-NEXT: ; def s[4:9]
10445 ; GFX90A-NEXT: ;;#ASMEND
10446 ; GFX90A-NEXT: ;;#ASMSTART
10447 ; GFX90A-NEXT: ; def s[8:13]
10448 ; GFX90A-NEXT: ;;#ASMEND
10449 ; GFX90A-NEXT: s_mov_b32 s8, s12
10450 ; GFX90A-NEXT: s_mov_b32 s9, s13
10451 ; GFX90A-NEXT: s_mov_b32 s10, s4
10452 ; GFX90A-NEXT: s_mov_b32 s11, s5
10453 ; GFX90A-NEXT: s_mov_b32 s12, s6
10454 ; GFX90A-NEXT: s_mov_b32 s13, s7
10455 ; GFX90A-NEXT: s_mov_b32 s14, s6
10456 ; GFX90A-NEXT: s_mov_b32 s15, s7
10457 ; GFX90A-NEXT: ;;#ASMSTART
10458 ; GFX90A-NEXT: ; use s[8:15]
10459 ; GFX90A-NEXT: ;;#ASMEND
10460 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10462 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
10464 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10465 ; GFX940-NEXT: ;;#ASMSTART
10466 ; GFX940-NEXT: ; def s[0:5]
10467 ; GFX940-NEXT: ;;#ASMEND
10468 ; GFX940-NEXT: s_mov_b32 s10, s0
10469 ; GFX940-NEXT: ;;#ASMSTART
10470 ; GFX940-NEXT: ; def s[4:9]
10471 ; GFX940-NEXT: ;;#ASMEND
10472 ; GFX940-NEXT: s_mov_b32 s11, s1
10473 ; GFX940-NEXT: s_mov_b32 s12, s2
10474 ; GFX940-NEXT: s_mov_b32 s13, s3
10475 ; GFX940-NEXT: s_mov_b32 s14, s2
10476 ; GFX940-NEXT: s_mov_b32 s15, s3
10477 ; GFX940-NEXT: ;;#ASMSTART
10478 ; GFX940-NEXT: ; use s[8:15]
10479 ; GFX940-NEXT: ;;#ASMEND
10480 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10481 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10482 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10483 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
10484 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10488 define void @s_shuffle_v4i64_v3i64__5_2_1_1() {
10489 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
10491 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10492 ; GFX900-NEXT: ;;#ASMSTART
10493 ; GFX900-NEXT: ; def s[12:17]
10494 ; GFX900-NEXT: ;;#ASMEND
10495 ; GFX900-NEXT: ;;#ASMSTART
10496 ; GFX900-NEXT: ; def s[4:9]
10497 ; GFX900-NEXT: ;;#ASMEND
10498 ; GFX900-NEXT: s_mov_b32 s10, s16
10499 ; GFX900-NEXT: s_mov_b32 s11, s17
10500 ; GFX900-NEXT: s_mov_b32 s12, s14
10501 ; GFX900-NEXT: s_mov_b32 s13, s15
10502 ; GFX900-NEXT: ;;#ASMSTART
10503 ; GFX900-NEXT: ; use s[8:15]
10504 ; GFX900-NEXT: ;;#ASMEND
10505 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10507 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
10509 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10510 ; GFX90A-NEXT: ;;#ASMSTART
10511 ; GFX90A-NEXT: ; def s[12:17]
10512 ; GFX90A-NEXT: ;;#ASMEND
10513 ; GFX90A-NEXT: ;;#ASMSTART
10514 ; GFX90A-NEXT: ; def s[4:9]
10515 ; GFX90A-NEXT: ;;#ASMEND
10516 ; GFX90A-NEXT: s_mov_b32 s10, s16
10517 ; GFX90A-NEXT: s_mov_b32 s11, s17
10518 ; GFX90A-NEXT: s_mov_b32 s12, s14
10519 ; GFX90A-NEXT: s_mov_b32 s13, s15
10520 ; GFX90A-NEXT: ;;#ASMSTART
10521 ; GFX90A-NEXT: ; use s[8:15]
10522 ; GFX90A-NEXT: ;;#ASMEND
10523 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10525 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
10527 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10528 ; GFX940-NEXT: ;;#ASMSTART
10529 ; GFX940-NEXT: ; def s[8:13]
10530 ; GFX940-NEXT: ;;#ASMEND
10531 ; GFX940-NEXT: ;;#ASMSTART
10532 ; GFX940-NEXT: ; def s[0:5]
10533 ; GFX940-NEXT: ;;#ASMEND
10534 ; GFX940-NEXT: s_mov_b32 s8, s12
10535 ; GFX940-NEXT: s_mov_b32 s9, s13
10536 ; GFX940-NEXT: s_mov_b32 s10, s4
10537 ; GFX940-NEXT: s_mov_b32 s11, s5
10538 ; GFX940-NEXT: s_mov_b32 s12, s2
10539 ; GFX940-NEXT: s_mov_b32 s13, s3
10540 ; GFX940-NEXT: s_mov_b32 s14, s2
10541 ; GFX940-NEXT: s_mov_b32 s15, s3
10542 ; GFX940-NEXT: ;;#ASMSTART
10543 ; GFX940-NEXT: ; use s[8:15]
10544 ; GFX940-NEXT: ;;#ASMEND
10545 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10546 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10547 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10548 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
10549 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10553 define void @s_shuffle_v4i64_v3i64__5_3_1_1() {
10554 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
10556 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10557 ; GFX900-NEXT: ;;#ASMSTART
10558 ; GFX900-NEXT: ; def s[4:9]
10559 ; GFX900-NEXT: ;;#ASMEND
10560 ; GFX900-NEXT: ;;#ASMSTART
10561 ; GFX900-NEXT: ; def s[12:17]
10562 ; GFX900-NEXT: ;;#ASMEND
10563 ; GFX900-NEXT: s_mov_b32 s8, s16
10564 ; GFX900-NEXT: s_mov_b32 s9, s17
10565 ; GFX900-NEXT: s_mov_b32 s10, s12
10566 ; GFX900-NEXT: s_mov_b32 s11, s13
10567 ; GFX900-NEXT: s_mov_b32 s12, s6
10568 ; GFX900-NEXT: s_mov_b32 s13, s7
10569 ; GFX900-NEXT: s_mov_b32 s14, s6
10570 ; GFX900-NEXT: s_mov_b32 s15, s7
10571 ; GFX900-NEXT: ;;#ASMSTART
10572 ; GFX900-NEXT: ; use s[8:15]
10573 ; GFX900-NEXT: ;;#ASMEND
10574 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10576 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
10578 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10579 ; GFX90A-NEXT: ;;#ASMSTART
10580 ; GFX90A-NEXT: ; def s[4:9]
10581 ; GFX90A-NEXT: ;;#ASMEND
10582 ; GFX90A-NEXT: ;;#ASMSTART
10583 ; GFX90A-NEXT: ; def s[12:17]
10584 ; GFX90A-NEXT: ;;#ASMEND
10585 ; GFX90A-NEXT: s_mov_b32 s8, s16
10586 ; GFX90A-NEXT: s_mov_b32 s9, s17
10587 ; GFX90A-NEXT: s_mov_b32 s10, s12
10588 ; GFX90A-NEXT: s_mov_b32 s11, s13
10589 ; GFX90A-NEXT: s_mov_b32 s12, s6
10590 ; GFX90A-NEXT: s_mov_b32 s13, s7
10591 ; GFX90A-NEXT: s_mov_b32 s14, s6
10592 ; GFX90A-NEXT: s_mov_b32 s15, s7
10593 ; GFX90A-NEXT: ;;#ASMSTART
10594 ; GFX90A-NEXT: ; use s[8:15]
10595 ; GFX90A-NEXT: ;;#ASMEND
10596 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10598 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
10600 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10601 ; GFX940-NEXT: ;;#ASMSTART
10602 ; GFX940-NEXT: ; def s[0:5]
10603 ; GFX940-NEXT: ;;#ASMEND
10604 ; GFX940-NEXT: s_mov_b32 s12, s2
10605 ; GFX940-NEXT: ;;#ASMSTART
10606 ; GFX940-NEXT: ; def s[4:9]
10607 ; GFX940-NEXT: ;;#ASMEND
10608 ; GFX940-NEXT: s_mov_b32 s10, s4
10609 ; GFX940-NEXT: s_mov_b32 s11, s5
10610 ; GFX940-NEXT: s_mov_b32 s13, s3
10611 ; GFX940-NEXT: s_mov_b32 s14, s2
10612 ; GFX940-NEXT: s_mov_b32 s15, s3
10613 ; GFX940-NEXT: ;;#ASMSTART
10614 ; GFX940-NEXT: ; use s[8:15]
10615 ; GFX940-NEXT: ;;#ASMEND
10616 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10617 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10618 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10619 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
10620 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10624 define void @s_shuffle_v4i64_v3i64__5_4_1_1() {
10625 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
10627 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10628 ; GFX900-NEXT: ;;#ASMSTART
10629 ; GFX900-NEXT: ; def s[4:9]
10630 ; GFX900-NEXT: ;;#ASMEND
10631 ; GFX900-NEXT: ;;#ASMSTART
10632 ; GFX900-NEXT: ; def s[8:13]
10633 ; GFX900-NEXT: ;;#ASMEND
10634 ; GFX900-NEXT: s_mov_b32 s8, s12
10635 ; GFX900-NEXT: s_mov_b32 s9, s13
10636 ; GFX900-NEXT: s_mov_b32 s12, s6
10637 ; GFX900-NEXT: s_mov_b32 s13, s7
10638 ; GFX900-NEXT: s_mov_b32 s14, s6
10639 ; GFX900-NEXT: s_mov_b32 s15, s7
10640 ; GFX900-NEXT: ;;#ASMSTART
10641 ; GFX900-NEXT: ; use s[8:15]
10642 ; GFX900-NEXT: ;;#ASMEND
10643 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10645 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
10647 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10648 ; GFX90A-NEXT: ;;#ASMSTART
10649 ; GFX90A-NEXT: ; def s[4:9]
10650 ; GFX90A-NEXT: ;;#ASMEND
10651 ; GFX90A-NEXT: ;;#ASMSTART
10652 ; GFX90A-NEXT: ; def s[8:13]
10653 ; GFX90A-NEXT: ;;#ASMEND
10654 ; GFX90A-NEXT: s_mov_b32 s8, s12
10655 ; GFX90A-NEXT: s_mov_b32 s9, s13
10656 ; GFX90A-NEXT: s_mov_b32 s12, s6
10657 ; GFX90A-NEXT: s_mov_b32 s13, s7
10658 ; GFX90A-NEXT: s_mov_b32 s14, s6
10659 ; GFX90A-NEXT: s_mov_b32 s15, s7
10660 ; GFX90A-NEXT: ;;#ASMSTART
10661 ; GFX90A-NEXT: ; use s[8:15]
10662 ; GFX90A-NEXT: ;;#ASMEND
10663 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10665 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
10667 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10668 ; GFX940-NEXT: ;;#ASMSTART
10669 ; GFX940-NEXT: ; def s[8:13]
10670 ; GFX940-NEXT: ;;#ASMEND
10671 ; GFX940-NEXT: ;;#ASMSTART
10672 ; GFX940-NEXT: ; def s[0:5]
10673 ; GFX940-NEXT: ;;#ASMEND
10674 ; GFX940-NEXT: s_mov_b32 s8, s12
10675 ; GFX940-NEXT: s_mov_b32 s9, s13
10676 ; GFX940-NEXT: s_mov_b32 s12, s2
10677 ; GFX940-NEXT: s_mov_b32 s13, s3
10678 ; GFX940-NEXT: s_mov_b32 s14, s2
10679 ; GFX940-NEXT: s_mov_b32 s15, s3
10680 ; GFX940-NEXT: ;;#ASMSTART
10681 ; GFX940-NEXT: ; use s[8:15]
10682 ; GFX940-NEXT: ;;#ASMEND
10683 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10684 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10685 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10686 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
10687 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10691 define void @s_shuffle_v4i64_v3i64__5_5_1_1() {
10692 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
10694 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10695 ; GFX900-NEXT: ;;#ASMSTART
10696 ; GFX900-NEXT: ; def s[4:9]
10697 ; GFX900-NEXT: ;;#ASMEND
10698 ; GFX900-NEXT: ;;#ASMSTART
10699 ; GFX900-NEXT: ; def s[8:13]
10700 ; GFX900-NEXT: ;;#ASMEND
10701 ; GFX900-NEXT: s_mov_b32 s8, s12
10702 ; GFX900-NEXT: s_mov_b32 s9, s13
10703 ; GFX900-NEXT: s_mov_b32 s10, s12
10704 ; GFX900-NEXT: s_mov_b32 s11, s13
10705 ; GFX900-NEXT: s_mov_b32 s12, s6
10706 ; GFX900-NEXT: s_mov_b32 s13, s7
10707 ; GFX900-NEXT: s_mov_b32 s14, s6
10708 ; GFX900-NEXT: s_mov_b32 s15, s7
10709 ; GFX900-NEXT: ;;#ASMSTART
10710 ; GFX900-NEXT: ; use s[8:15]
10711 ; GFX900-NEXT: ;;#ASMEND
10712 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10714 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
10716 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10717 ; GFX90A-NEXT: ;;#ASMSTART
10718 ; GFX90A-NEXT: ; def s[4:9]
10719 ; GFX90A-NEXT: ;;#ASMEND
10720 ; GFX90A-NEXT: ;;#ASMSTART
10721 ; GFX90A-NEXT: ; def s[8:13]
10722 ; GFX90A-NEXT: ;;#ASMEND
10723 ; GFX90A-NEXT: s_mov_b32 s8, s12
10724 ; GFX90A-NEXT: s_mov_b32 s9, s13
10725 ; GFX90A-NEXT: s_mov_b32 s10, s12
10726 ; GFX90A-NEXT: s_mov_b32 s11, s13
10727 ; GFX90A-NEXT: s_mov_b32 s12, s6
10728 ; GFX90A-NEXT: s_mov_b32 s13, s7
10729 ; GFX90A-NEXT: s_mov_b32 s14, s6
10730 ; GFX90A-NEXT: s_mov_b32 s15, s7
10731 ; GFX90A-NEXT: ;;#ASMSTART
10732 ; GFX90A-NEXT: ; use s[8:15]
10733 ; GFX90A-NEXT: ;;#ASMEND
10734 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10736 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
10738 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10739 ; GFX940-NEXT: ;;#ASMSTART
10740 ; GFX940-NEXT: ; def s[8:13]
10741 ; GFX940-NEXT: ;;#ASMEND
10742 ; GFX940-NEXT: ;;#ASMSTART
10743 ; GFX940-NEXT: ; def s[0:5]
10744 ; GFX940-NEXT: ;;#ASMEND
10745 ; GFX940-NEXT: s_mov_b32 s8, s12
10746 ; GFX940-NEXT: s_mov_b32 s9, s13
10747 ; GFX940-NEXT: s_mov_b32 s10, s12
10748 ; GFX940-NEXT: s_mov_b32 s11, s13
10749 ; GFX940-NEXT: s_mov_b32 s12, s2
10750 ; GFX940-NEXT: s_mov_b32 s13, s3
10751 ; GFX940-NEXT: s_mov_b32 s14, s2
10752 ; GFX940-NEXT: s_mov_b32 s15, s3
10753 ; GFX940-NEXT: ;;#ASMSTART
10754 ; GFX940-NEXT: ; use s[8:15]
10755 ; GFX940-NEXT: ;;#ASMEND
10756 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10757 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10758 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10759 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
10760 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10764 define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
10765 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
10767 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10768 ; GFX900-NEXT: ;;#ASMSTART
10769 ; GFX900-NEXT: ; def s[4:9]
10770 ; GFX900-NEXT: ;;#ASMEND
10771 ; GFX900-NEXT: ;;#ASMSTART
10772 ; GFX900-NEXT: ; def s[8:13]
10773 ; GFX900-NEXT: ;;#ASMEND
10774 ; GFX900-NEXT: s_mov_b32 s8, s12
10775 ; GFX900-NEXT: s_mov_b32 s9, s13
10776 ; GFX900-NEXT: s_mov_b32 s10, s12
10777 ; GFX900-NEXT: s_mov_b32 s11, s13
10778 ; GFX900-NEXT: s_mov_b32 s14, s6
10779 ; GFX900-NEXT: s_mov_b32 s15, s7
10780 ; GFX900-NEXT: ;;#ASMSTART
10781 ; GFX900-NEXT: ; use s[8:15]
10782 ; GFX900-NEXT: ;;#ASMEND
10783 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10785 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
10787 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10788 ; GFX90A-NEXT: ;;#ASMSTART
10789 ; GFX90A-NEXT: ; def s[4:9]
10790 ; GFX90A-NEXT: ;;#ASMEND
10791 ; GFX90A-NEXT: ;;#ASMSTART
10792 ; GFX90A-NEXT: ; def s[8:13]
10793 ; GFX90A-NEXT: ;;#ASMEND
10794 ; GFX90A-NEXT: s_mov_b32 s8, s12
10795 ; GFX90A-NEXT: s_mov_b32 s9, s13
10796 ; GFX90A-NEXT: s_mov_b32 s10, s12
10797 ; GFX90A-NEXT: s_mov_b32 s11, s13
10798 ; GFX90A-NEXT: s_mov_b32 s14, s6
10799 ; GFX90A-NEXT: s_mov_b32 s15, s7
10800 ; GFX90A-NEXT: ;;#ASMSTART
10801 ; GFX90A-NEXT: ; use s[8:15]
10802 ; GFX90A-NEXT: ;;#ASMEND
10803 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10805 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
10807 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10808 ; GFX940-NEXT: ;;#ASMSTART
10809 ; GFX940-NEXT: ; def s[8:13]
10810 ; GFX940-NEXT: ;;#ASMEND
10811 ; GFX940-NEXT: ;;#ASMSTART
10812 ; GFX940-NEXT: ; def s[0:5]
10813 ; GFX940-NEXT: ;;#ASMEND
10814 ; GFX940-NEXT: s_mov_b32 s8, s12
10815 ; GFX940-NEXT: s_mov_b32 s9, s13
10816 ; GFX940-NEXT: s_mov_b32 s10, s12
10817 ; GFX940-NEXT: s_mov_b32 s11, s13
10818 ; GFX940-NEXT: s_mov_b32 s14, s2
10819 ; GFX940-NEXT: s_mov_b32 s15, s3
10820 ; GFX940-NEXT: ;;#ASMSTART
10821 ; GFX940-NEXT: ; use s[8:15]
10822 ; GFX940-NEXT: ;;#ASMEND
10823 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10824 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10825 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10826 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
10827 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10831 define void @s_shuffle_v4i64_v3i64__5_5_0_1() {
10832 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
10834 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10835 ; GFX900-NEXT: ;;#ASMSTART
10836 ; GFX900-NEXT: ; def s[4:9]
10837 ; GFX900-NEXT: ;;#ASMEND
10838 ; GFX900-NEXT: ;;#ASMSTART
10839 ; GFX900-NEXT: ; def s[8:13]
10840 ; GFX900-NEXT: ;;#ASMEND
10841 ; GFX900-NEXT: s_mov_b32 s8, s12
10842 ; GFX900-NEXT: s_mov_b32 s9, s13
10843 ; GFX900-NEXT: s_mov_b32 s10, s12
10844 ; GFX900-NEXT: s_mov_b32 s11, s13
10845 ; GFX900-NEXT: s_mov_b32 s12, s4
10846 ; GFX900-NEXT: s_mov_b32 s13, s5
10847 ; GFX900-NEXT: s_mov_b32 s14, s6
10848 ; GFX900-NEXT: s_mov_b32 s15, s7
10849 ; GFX900-NEXT: ;;#ASMSTART
10850 ; GFX900-NEXT: ; use s[8:15]
10851 ; GFX900-NEXT: ;;#ASMEND
10852 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10854 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
10856 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10857 ; GFX90A-NEXT: ;;#ASMSTART
10858 ; GFX90A-NEXT: ; def s[4:9]
10859 ; GFX90A-NEXT: ;;#ASMEND
10860 ; GFX90A-NEXT: ;;#ASMSTART
10861 ; GFX90A-NEXT: ; def s[8:13]
10862 ; GFX90A-NEXT: ;;#ASMEND
10863 ; GFX90A-NEXT: s_mov_b32 s8, s12
10864 ; GFX90A-NEXT: s_mov_b32 s9, s13
10865 ; GFX90A-NEXT: s_mov_b32 s10, s12
10866 ; GFX90A-NEXT: s_mov_b32 s11, s13
10867 ; GFX90A-NEXT: s_mov_b32 s12, s4
10868 ; GFX90A-NEXT: s_mov_b32 s13, s5
10869 ; GFX90A-NEXT: s_mov_b32 s14, s6
10870 ; GFX90A-NEXT: s_mov_b32 s15, s7
10871 ; GFX90A-NEXT: ;;#ASMSTART
10872 ; GFX90A-NEXT: ; use s[8:15]
10873 ; GFX90A-NEXT: ;;#ASMEND
10874 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10876 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
10878 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10879 ; GFX940-NEXT: ;;#ASMSTART
10880 ; GFX940-NEXT: ; def s[8:13]
10881 ; GFX940-NEXT: ;;#ASMEND
10882 ; GFX940-NEXT: ;;#ASMSTART
10883 ; GFX940-NEXT: ; def s[0:5]
10884 ; GFX940-NEXT: ;;#ASMEND
10885 ; GFX940-NEXT: s_mov_b32 s8, s12
10886 ; GFX940-NEXT: s_mov_b32 s9, s13
10887 ; GFX940-NEXT: s_mov_b32 s10, s12
10888 ; GFX940-NEXT: s_mov_b32 s11, s13
10889 ; GFX940-NEXT: s_mov_b32 s12, s0
10890 ; GFX940-NEXT: s_mov_b32 s13, s1
10891 ; GFX940-NEXT: s_mov_b32 s14, s2
10892 ; GFX940-NEXT: s_mov_b32 s15, s3
10893 ; GFX940-NEXT: ;;#ASMSTART
10894 ; GFX940-NEXT: ; use s[8:15]
10895 ; GFX940-NEXT: ;;#ASMEND
10896 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10897 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10898 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10899 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
10900 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10904 define void @s_shuffle_v4i64_v3i64__5_5_2_1() {
10905 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
10907 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10908 ; GFX900-NEXT: ;;#ASMSTART
10909 ; GFX900-NEXT: ; def s[12:17]
10910 ; GFX900-NEXT: ;;#ASMEND
10911 ; GFX900-NEXT: ;;#ASMSTART
10912 ; GFX900-NEXT: ; def s[8:13]
10913 ; GFX900-NEXT: ;;#ASMEND
10914 ; GFX900-NEXT: s_mov_b32 s8, s12
10915 ; GFX900-NEXT: s_mov_b32 s9, s13
10916 ; GFX900-NEXT: s_mov_b32 s10, s12
10917 ; GFX900-NEXT: s_mov_b32 s11, s13
10918 ; GFX900-NEXT: s_mov_b32 s12, s16
10919 ; GFX900-NEXT: s_mov_b32 s13, s17
10920 ; GFX900-NEXT: ;;#ASMSTART
10921 ; GFX900-NEXT: ; use s[8:15]
10922 ; GFX900-NEXT: ;;#ASMEND
10923 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10925 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
10927 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10928 ; GFX90A-NEXT: ;;#ASMSTART
10929 ; GFX90A-NEXT: ; def s[12:17]
10930 ; GFX90A-NEXT: ;;#ASMEND
10931 ; GFX90A-NEXT: ;;#ASMSTART
10932 ; GFX90A-NEXT: ; def s[8:13]
10933 ; GFX90A-NEXT: ;;#ASMEND
10934 ; GFX90A-NEXT: s_mov_b32 s8, s12
10935 ; GFX90A-NEXT: s_mov_b32 s9, s13
10936 ; GFX90A-NEXT: s_mov_b32 s10, s12
10937 ; GFX90A-NEXT: s_mov_b32 s11, s13
10938 ; GFX90A-NEXT: s_mov_b32 s12, s16
10939 ; GFX90A-NEXT: s_mov_b32 s13, s17
10940 ; GFX90A-NEXT: ;;#ASMSTART
10941 ; GFX90A-NEXT: ; use s[8:15]
10942 ; GFX90A-NEXT: ;;#ASMEND
10943 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10945 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
10947 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10948 ; GFX940-NEXT: ;;#ASMSTART
10949 ; GFX940-NEXT: ; def s[8:13]
10950 ; GFX940-NEXT: ;;#ASMEND
10951 ; GFX940-NEXT: ;;#ASMSTART
10952 ; GFX940-NEXT: ; def s[0:5]
10953 ; GFX940-NEXT: ;;#ASMEND
10954 ; GFX940-NEXT: s_mov_b32 s8, s12
10955 ; GFX940-NEXT: s_mov_b32 s9, s13
10956 ; GFX940-NEXT: s_mov_b32 s10, s12
10957 ; GFX940-NEXT: s_mov_b32 s11, s13
10958 ; GFX940-NEXT: s_mov_b32 s12, s4
10959 ; GFX940-NEXT: s_mov_b32 s13, s5
10960 ; GFX940-NEXT: s_mov_b32 s14, s2
10961 ; GFX940-NEXT: s_mov_b32 s15, s3
10962 ; GFX940-NEXT: ;;#ASMSTART
10963 ; GFX940-NEXT: ; use s[8:15]
10964 ; GFX940-NEXT: ;;#ASMEND
10965 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10966 %vec0 = call <3 x i64> asm "; def $0", "=s"()
10967 %vec1 = call <3 x i64> asm "; def $0", "=s"()
10968 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
10969 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10973 define void @s_shuffle_v4i64_v3i64__5_5_3_1() {
10974 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
10976 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10977 ; GFX900-NEXT: ;;#ASMSTART
10978 ; GFX900-NEXT: ; def s[4:9]
10979 ; GFX900-NEXT: ;;#ASMEND
10980 ; GFX900-NEXT: ;;#ASMSTART
10981 ; GFX900-NEXT: ; def s[12:17]
10982 ; GFX900-NEXT: ;;#ASMEND
10983 ; GFX900-NEXT: s_mov_b32 s8, s16
10984 ; GFX900-NEXT: s_mov_b32 s9, s17
10985 ; GFX900-NEXT: s_mov_b32 s10, s16
10986 ; GFX900-NEXT: s_mov_b32 s11, s17
10987 ; GFX900-NEXT: s_mov_b32 s14, s6
10988 ; GFX900-NEXT: s_mov_b32 s15, s7
10989 ; GFX900-NEXT: ;;#ASMSTART
10990 ; GFX900-NEXT: ; use s[8:15]
10991 ; GFX900-NEXT: ;;#ASMEND
10992 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10994 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
10996 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10997 ; GFX90A-NEXT: ;;#ASMSTART
10998 ; GFX90A-NEXT: ; def s[4:9]
10999 ; GFX90A-NEXT: ;;#ASMEND
11000 ; GFX90A-NEXT: ;;#ASMSTART
11001 ; GFX90A-NEXT: ; def s[12:17]
11002 ; GFX90A-NEXT: ;;#ASMEND
11003 ; GFX90A-NEXT: s_mov_b32 s8, s16
11004 ; GFX90A-NEXT: s_mov_b32 s9, s17
11005 ; GFX90A-NEXT: s_mov_b32 s10, s16
11006 ; GFX90A-NEXT: s_mov_b32 s11, s17
11007 ; GFX90A-NEXT: s_mov_b32 s14, s6
11008 ; GFX90A-NEXT: s_mov_b32 s15, s7
11009 ; GFX90A-NEXT: ;;#ASMSTART
11010 ; GFX90A-NEXT: ; use s[8:15]
11011 ; GFX90A-NEXT: ;;#ASMEND
11012 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11014 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
11016 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11017 ; GFX940-NEXT: ;;#ASMSTART
11018 ; GFX940-NEXT: ; def s[12:17]
11019 ; GFX940-NEXT: ;;#ASMEND
11020 ; GFX940-NEXT: ;;#ASMSTART
11021 ; GFX940-NEXT: ; def s[0:5]
11022 ; GFX940-NEXT: ;;#ASMEND
11023 ; GFX940-NEXT: s_mov_b32 s8, s16
11024 ; GFX940-NEXT: s_mov_b32 s9, s17
11025 ; GFX940-NEXT: s_mov_b32 s10, s16
11026 ; GFX940-NEXT: s_mov_b32 s11, s17
11027 ; GFX940-NEXT: s_mov_b32 s14, s2
11028 ; GFX940-NEXT: s_mov_b32 s15, s3
11029 ; GFX940-NEXT: ;;#ASMSTART
11030 ; GFX940-NEXT: ; use s[8:15]
11031 ; GFX940-NEXT: ;;#ASMEND
11032 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11033 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11034 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11035 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
11036 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11040 define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
11041 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
11043 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11044 ; GFX900-NEXT: ;;#ASMSTART
11045 ; GFX900-NEXT: ; def s[4:9]
11046 ; GFX900-NEXT: ;;#ASMEND
11047 ; GFX900-NEXT: ;;#ASMSTART
11048 ; GFX900-NEXT: ; def s[12:17]
11049 ; GFX900-NEXT: ;;#ASMEND
11050 ; GFX900-NEXT: s_mov_b32 s8, s16
11051 ; GFX900-NEXT: s_mov_b32 s9, s17
11052 ; GFX900-NEXT: s_mov_b32 s10, s16
11053 ; GFX900-NEXT: s_mov_b32 s11, s17
11054 ; GFX900-NEXT: s_mov_b32 s12, s14
11055 ; GFX900-NEXT: s_mov_b32 s13, s15
11056 ; GFX900-NEXT: s_mov_b32 s14, s6
11057 ; GFX900-NEXT: s_mov_b32 s15, s7
11058 ; GFX900-NEXT: ;;#ASMSTART
11059 ; GFX900-NEXT: ; use s[8:15]
11060 ; GFX900-NEXT: ;;#ASMEND
11061 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11063 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
11065 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11066 ; GFX90A-NEXT: ;;#ASMSTART
11067 ; GFX90A-NEXT: ; def s[4:9]
11068 ; GFX90A-NEXT: ;;#ASMEND
11069 ; GFX90A-NEXT: ;;#ASMSTART
11070 ; GFX90A-NEXT: ; def s[12:17]
11071 ; GFX90A-NEXT: ;;#ASMEND
11072 ; GFX90A-NEXT: s_mov_b32 s8, s16
11073 ; GFX90A-NEXT: s_mov_b32 s9, s17
11074 ; GFX90A-NEXT: s_mov_b32 s10, s16
11075 ; GFX90A-NEXT: s_mov_b32 s11, s17
11076 ; GFX90A-NEXT: s_mov_b32 s12, s14
11077 ; GFX90A-NEXT: s_mov_b32 s13, s15
11078 ; GFX90A-NEXT: s_mov_b32 s14, s6
11079 ; GFX90A-NEXT: s_mov_b32 s15, s7
11080 ; GFX90A-NEXT: ;;#ASMSTART
11081 ; GFX90A-NEXT: ; use s[8:15]
11082 ; GFX90A-NEXT: ;;#ASMEND
11083 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11085 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
11087 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11088 ; GFX940-NEXT: ;;#ASMSTART
11089 ; GFX940-NEXT: ; def s[12:17]
11090 ; GFX940-NEXT: ;;#ASMEND
11091 ; GFX940-NEXT: ;;#ASMSTART
11092 ; GFX940-NEXT: ; def s[0:5]
11093 ; GFX940-NEXT: ;;#ASMEND
11094 ; GFX940-NEXT: s_mov_b32 s8, s16
11095 ; GFX940-NEXT: s_mov_b32 s9, s17
11096 ; GFX940-NEXT: s_mov_b32 s10, s16
11097 ; GFX940-NEXT: s_mov_b32 s11, s17
11098 ; GFX940-NEXT: s_mov_b32 s12, s14
11099 ; GFX940-NEXT: s_mov_b32 s13, s15
11100 ; GFX940-NEXT: s_mov_b32 s14, s2
11101 ; GFX940-NEXT: s_mov_b32 s15, s3
11102 ; GFX940-NEXT: ;;#ASMSTART
11103 ; GFX940-NEXT: ; use s[8:15]
11104 ; GFX940-NEXT: ;;#ASMEND
11105 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11106 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11107 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11108 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
11109 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11113 define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
11114 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
11116 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11117 ; GFX9-NEXT: ;;#ASMSTART
11118 ; GFX9-NEXT: ; def s[8:13]
11119 ; GFX9-NEXT: ;;#ASMEND
11120 ; GFX9-NEXT: s_mov_b32 s10, s12
11121 ; GFX9-NEXT: s_mov_b32 s11, s13
11122 ; GFX9-NEXT: s_mov_b32 s14, s12
11123 ; GFX9-NEXT: s_mov_b32 s15, s13
11124 ; GFX9-NEXT: ;;#ASMSTART
11125 ; GFX9-NEXT: ; use s[8:15]
11126 ; GFX9-NEXT: ;;#ASMEND
11127 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11128 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11129 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
11130 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11134 define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
11135 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
11137 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11138 ; GFX9-NEXT: ;;#ASMSTART
11139 ; GFX9-NEXT: ; def s[8:13]
11140 ; GFX9-NEXT: ;;#ASMEND
11141 ; GFX9-NEXT: s_mov_b32 s10, s12
11142 ; GFX9-NEXT: s_mov_b32 s11, s13
11143 ; GFX9-NEXT: s_mov_b32 s14, s12
11144 ; GFX9-NEXT: s_mov_b32 s15, s13
11145 ; GFX9-NEXT: ;;#ASMSTART
11146 ; GFX9-NEXT: ; use s[8:15]
11147 ; GFX9-NEXT: ;;#ASMEND
11148 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11149 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11150 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
11151 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11155 define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
11156 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
11158 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11159 ; GFX9-NEXT: ;;#ASMSTART
11160 ; GFX9-NEXT: ; def s[8:13]
11161 ; GFX9-NEXT: ;;#ASMEND
11162 ; GFX9-NEXT: s_mov_b32 s8, s10
11163 ; GFX9-NEXT: s_mov_b32 s9, s11
11164 ; GFX9-NEXT: s_mov_b32 s10, s12
11165 ; GFX9-NEXT: s_mov_b32 s11, s13
11166 ; GFX9-NEXT: s_mov_b32 s14, s12
11167 ; GFX9-NEXT: s_mov_b32 s15, s13
11168 ; GFX9-NEXT: ;;#ASMSTART
11169 ; GFX9-NEXT: ; use s[8:15]
11170 ; GFX9-NEXT: ;;#ASMEND
11171 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11172 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11173 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
11174 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11178 define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
11179 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
11181 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11182 ; GFX9-NEXT: ;;#ASMSTART
11183 ; GFX9-NEXT: ; def s[8:13]
11184 ; GFX9-NEXT: ;;#ASMEND
11185 ; GFX9-NEXT: s_mov_b32 s8, s12
11186 ; GFX9-NEXT: s_mov_b32 s9, s13
11187 ; GFX9-NEXT: s_mov_b32 s10, s12
11188 ; GFX9-NEXT: s_mov_b32 s11, s13
11189 ; GFX9-NEXT: s_mov_b32 s14, s12
11190 ; GFX9-NEXT: s_mov_b32 s15, s13
11191 ; GFX9-NEXT: ;;#ASMSTART
11192 ; GFX9-NEXT: ; use s[8:15]
11193 ; GFX9-NEXT: ;;#ASMEND
11194 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11195 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11196 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
11197 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11201 define void @s_shuffle_v4i64_v3i64__3_2_2_2() {
11202 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
11204 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11205 ; GFX9-NEXT: ;;#ASMSTART
11206 ; GFX9-NEXT: ; def s[8:13]
11207 ; GFX9-NEXT: ;;#ASMEND
11208 ; GFX9-NEXT: s_mov_b32 s10, s12
11209 ; GFX9-NEXT: s_mov_b32 s11, s13
11210 ; GFX9-NEXT: s_mov_b32 s14, s12
11211 ; GFX9-NEXT: s_mov_b32 s15, s13
11212 ; GFX9-NEXT: ;;#ASMSTART
11213 ; GFX9-NEXT: ; use s[8:15]
11214 ; GFX9-NEXT: ;;#ASMEND
11215 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11216 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11217 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
11218 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11222 define void @s_shuffle_v4i64_v3i64__4_2_2_2() {
11223 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
11225 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11226 ; GFX900-NEXT: ;;#ASMSTART
11227 ; GFX900-NEXT: ; def s[8:13]
11228 ; GFX900-NEXT: ;;#ASMEND
11229 ; GFX900-NEXT: ;;#ASMSTART
11230 ; GFX900-NEXT: ; def s[4:9]
11231 ; GFX900-NEXT: ;;#ASMEND
11232 ; GFX900-NEXT: s_mov_b32 s8, s6
11233 ; GFX900-NEXT: s_mov_b32 s9, s7
11234 ; GFX900-NEXT: s_mov_b32 s10, s12
11235 ; GFX900-NEXT: s_mov_b32 s11, s13
11236 ; GFX900-NEXT: s_mov_b32 s14, s12
11237 ; GFX900-NEXT: s_mov_b32 s15, s13
11238 ; GFX900-NEXT: ;;#ASMSTART
11239 ; GFX900-NEXT: ; use s[8:15]
11240 ; GFX900-NEXT: ;;#ASMEND
11241 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11243 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
11245 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11246 ; GFX90A-NEXT: ;;#ASMSTART
11247 ; GFX90A-NEXT: ; def s[8:13]
11248 ; GFX90A-NEXT: ;;#ASMEND
11249 ; GFX90A-NEXT: ;;#ASMSTART
11250 ; GFX90A-NEXT: ; def s[4:9]
11251 ; GFX90A-NEXT: ;;#ASMEND
11252 ; GFX90A-NEXT: s_mov_b32 s8, s6
11253 ; GFX90A-NEXT: s_mov_b32 s9, s7
11254 ; GFX90A-NEXT: s_mov_b32 s10, s12
11255 ; GFX90A-NEXT: s_mov_b32 s11, s13
11256 ; GFX90A-NEXT: s_mov_b32 s14, s12
11257 ; GFX90A-NEXT: s_mov_b32 s15, s13
11258 ; GFX90A-NEXT: ;;#ASMSTART
11259 ; GFX90A-NEXT: ; use s[8:15]
11260 ; GFX90A-NEXT: ;;#ASMEND
11261 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11263 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
11265 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11266 ; GFX940-NEXT: ;;#ASMSTART
11267 ; GFX940-NEXT: ; def s[8:13]
11268 ; GFX940-NEXT: ;;#ASMEND
11269 ; GFX940-NEXT: ;;#ASMSTART
11270 ; GFX940-NEXT: ; def s[0:5]
11271 ; GFX940-NEXT: ;;#ASMEND
11272 ; GFX940-NEXT: s_mov_b32 s8, s2
11273 ; GFX940-NEXT: s_mov_b32 s9, s3
11274 ; GFX940-NEXT: s_mov_b32 s10, s12
11275 ; GFX940-NEXT: s_mov_b32 s11, s13
11276 ; GFX940-NEXT: s_mov_b32 s14, s12
11277 ; GFX940-NEXT: s_mov_b32 s15, s13
11278 ; GFX940-NEXT: ;;#ASMSTART
11279 ; GFX940-NEXT: ; use s[8:15]
11280 ; GFX940-NEXT: ;;#ASMEND
11281 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11282 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11283 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11284 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
11285 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11289 define void @s_shuffle_v4i64_v3i64__5_2_2_2() {
11290 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
11292 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11293 ; GFX900-NEXT: ;;#ASMSTART
11294 ; GFX900-NEXT: ; def s[8:13]
11295 ; GFX900-NEXT: ;;#ASMEND
11296 ; GFX900-NEXT: ;;#ASMSTART
11297 ; GFX900-NEXT: ; def s[4:9]
11298 ; GFX900-NEXT: ;;#ASMEND
11299 ; GFX900-NEXT: s_mov_b32 s10, s12
11300 ; GFX900-NEXT: s_mov_b32 s11, s13
11301 ; GFX900-NEXT: s_mov_b32 s14, s12
11302 ; GFX900-NEXT: s_mov_b32 s15, s13
11303 ; GFX900-NEXT: ;;#ASMSTART
11304 ; GFX900-NEXT: ; use s[8:15]
11305 ; GFX900-NEXT: ;;#ASMEND
11306 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11308 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
11310 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11311 ; GFX90A-NEXT: ;;#ASMSTART
11312 ; GFX90A-NEXT: ; def s[8:13]
11313 ; GFX90A-NEXT: ;;#ASMEND
11314 ; GFX90A-NEXT: ;;#ASMSTART
11315 ; GFX90A-NEXT: ; def s[4:9]
11316 ; GFX90A-NEXT: ;;#ASMEND
11317 ; GFX90A-NEXT: s_mov_b32 s10, s12
11318 ; GFX90A-NEXT: s_mov_b32 s11, s13
11319 ; GFX90A-NEXT: s_mov_b32 s14, s12
11320 ; GFX90A-NEXT: s_mov_b32 s15, s13
11321 ; GFX90A-NEXT: ;;#ASMSTART
11322 ; GFX90A-NEXT: ; use s[8:15]
11323 ; GFX90A-NEXT: ;;#ASMEND
11324 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11326 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
11328 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11329 ; GFX940-NEXT: ;;#ASMSTART
11330 ; GFX940-NEXT: ; def s[8:13]
11331 ; GFX940-NEXT: ;;#ASMEND
11332 ; GFX940-NEXT: ;;#ASMSTART
11333 ; GFX940-NEXT: ; def s[0:5]
11334 ; GFX940-NEXT: ;;#ASMEND
11335 ; GFX940-NEXT: s_mov_b32 s8, s4
11336 ; GFX940-NEXT: s_mov_b32 s9, s5
11337 ; GFX940-NEXT: s_mov_b32 s10, s12
11338 ; GFX940-NEXT: s_mov_b32 s11, s13
11339 ; GFX940-NEXT: s_mov_b32 s14, s12
11340 ; GFX940-NEXT: s_mov_b32 s15, s13
11341 ; GFX940-NEXT: ;;#ASMSTART
11342 ; GFX940-NEXT: ; use s[8:15]
11343 ; GFX940-NEXT: ;;#ASMEND
11344 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11345 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11346 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11347 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
11348 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11352 define void @s_shuffle_v4i64_v3i64__5_u_2_2() {
11353 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
11355 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11356 ; GFX900-NEXT: ;;#ASMSTART
11357 ; GFX900-NEXT: ; def s[8:13]
11358 ; GFX900-NEXT: ;;#ASMEND
11359 ; GFX900-NEXT: ;;#ASMSTART
11360 ; GFX900-NEXT: ; def s[4:9]
11361 ; GFX900-NEXT: ;;#ASMEND
11362 ; GFX900-NEXT: s_mov_b32 s14, s12
11363 ; GFX900-NEXT: s_mov_b32 s15, s13
11364 ; GFX900-NEXT: ;;#ASMSTART
11365 ; GFX900-NEXT: ; use s[8:15]
11366 ; GFX900-NEXT: ;;#ASMEND
11367 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11369 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
11371 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11372 ; GFX90A-NEXT: ;;#ASMSTART
11373 ; GFX90A-NEXT: ; def s[8:13]
11374 ; GFX90A-NEXT: ;;#ASMEND
11375 ; GFX90A-NEXT: ;;#ASMSTART
11376 ; GFX90A-NEXT: ; def s[4:9]
11377 ; GFX90A-NEXT: ;;#ASMEND
11378 ; GFX90A-NEXT: s_mov_b32 s14, s12
11379 ; GFX90A-NEXT: s_mov_b32 s15, s13
11380 ; GFX90A-NEXT: ;;#ASMSTART
11381 ; GFX90A-NEXT: ; use s[8:15]
11382 ; GFX90A-NEXT: ;;#ASMEND
11383 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11385 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
11387 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11388 ; GFX940-NEXT: ;;#ASMSTART
11389 ; GFX940-NEXT: ; def s[8:13]
11390 ; GFX940-NEXT: ;;#ASMEND
11391 ; GFX940-NEXT: ;;#ASMSTART
11392 ; GFX940-NEXT: ; def s[0:5]
11393 ; GFX940-NEXT: ;;#ASMEND
11394 ; GFX940-NEXT: s_mov_b32 s8, s4
11395 ; GFX940-NEXT: s_mov_b32 s9, s5
11396 ; GFX940-NEXT: s_mov_b32 s14, s12
11397 ; GFX940-NEXT: s_mov_b32 s15, s13
11398 ; GFX940-NEXT: ;;#ASMSTART
11399 ; GFX940-NEXT: ; use s[8:15]
11400 ; GFX940-NEXT: ;;#ASMEND
11401 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11402 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11403 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11404 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
11405 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11409 define void @s_shuffle_v4i64_v3i64__5_0_2_2() {
11410 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
11412 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11413 ; GFX900-NEXT: ;;#ASMSTART
11414 ; GFX900-NEXT: ; def s[12:17]
11415 ; GFX900-NEXT: ;;#ASMEND
11416 ; GFX900-NEXT: ;;#ASMSTART
11417 ; GFX900-NEXT: ; def s[4:9]
11418 ; GFX900-NEXT: ;;#ASMEND
11419 ; GFX900-NEXT: s_mov_b32 s10, s12
11420 ; GFX900-NEXT: s_mov_b32 s11, s13
11421 ; GFX900-NEXT: s_mov_b32 s12, s16
11422 ; GFX900-NEXT: s_mov_b32 s13, s17
11423 ; GFX900-NEXT: s_mov_b32 s14, s16
11424 ; GFX900-NEXT: s_mov_b32 s15, s17
11425 ; GFX900-NEXT: ;;#ASMSTART
11426 ; GFX900-NEXT: ; use s[8:15]
11427 ; GFX900-NEXT: ;;#ASMEND
11428 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11430 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
11432 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11433 ; GFX90A-NEXT: ;;#ASMSTART
11434 ; GFX90A-NEXT: ; def s[12:17]
11435 ; GFX90A-NEXT: ;;#ASMEND
11436 ; GFX90A-NEXT: ;;#ASMSTART
11437 ; GFX90A-NEXT: ; def s[4:9]
11438 ; GFX90A-NEXT: ;;#ASMEND
11439 ; GFX90A-NEXT: s_mov_b32 s10, s12
11440 ; GFX90A-NEXT: s_mov_b32 s11, s13
11441 ; GFX90A-NEXT: s_mov_b32 s12, s16
11442 ; GFX90A-NEXT: s_mov_b32 s13, s17
11443 ; GFX90A-NEXT: s_mov_b32 s14, s16
11444 ; GFX90A-NEXT: s_mov_b32 s15, s17
11445 ; GFX90A-NEXT: ;;#ASMSTART
11446 ; GFX90A-NEXT: ; use s[8:15]
11447 ; GFX90A-NEXT: ;;#ASMEND
11448 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11450 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
11452 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11453 ; GFX940-NEXT: ;;#ASMSTART
11454 ; GFX940-NEXT: ; def s[8:13]
11455 ; GFX940-NEXT: ;;#ASMEND
11456 ; GFX940-NEXT: ;;#ASMSTART
11457 ; GFX940-NEXT: ; def s[0:5]
11458 ; GFX940-NEXT: ;;#ASMEND
11459 ; GFX940-NEXT: s_mov_b32 s8, s12
11460 ; GFX940-NEXT: s_mov_b32 s9, s13
11461 ; GFX940-NEXT: s_mov_b32 s10, s0
11462 ; GFX940-NEXT: s_mov_b32 s11, s1
11463 ; GFX940-NEXT: s_mov_b32 s12, s4
11464 ; GFX940-NEXT: s_mov_b32 s13, s5
11465 ; GFX940-NEXT: s_mov_b32 s14, s4
11466 ; GFX940-NEXT: s_mov_b32 s15, s5
11467 ; GFX940-NEXT: ;;#ASMSTART
11468 ; GFX940-NEXT: ; use s[8:15]
11469 ; GFX940-NEXT: ;;#ASMEND
11470 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11471 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11472 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11473 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
11474 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11478 define void @s_shuffle_v4i64_v3i64__5_1_2_2() {
11479 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
11481 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11482 ; GFX900-NEXT: ;;#ASMSTART
11483 ; GFX900-NEXT: ; def s[8:13]
11484 ; GFX900-NEXT: ;;#ASMEND
11485 ; GFX900-NEXT: ;;#ASMSTART
11486 ; GFX900-NEXT: ; def s[4:9]
11487 ; GFX900-NEXT: ;;#ASMEND
11488 ; GFX900-NEXT: s_mov_b32 s14, s12
11489 ; GFX900-NEXT: s_mov_b32 s15, s13
11490 ; GFX900-NEXT: ;;#ASMSTART
11491 ; GFX900-NEXT: ; use s[8:15]
11492 ; GFX900-NEXT: ;;#ASMEND
11493 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11495 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
11497 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11498 ; GFX90A-NEXT: ;;#ASMSTART
11499 ; GFX90A-NEXT: ; def s[8:13]
11500 ; GFX90A-NEXT: ;;#ASMEND
11501 ; GFX90A-NEXT: ;;#ASMSTART
11502 ; GFX90A-NEXT: ; def s[4:9]
11503 ; GFX90A-NEXT: ;;#ASMEND
11504 ; GFX90A-NEXT: s_mov_b32 s14, s12
11505 ; GFX90A-NEXT: s_mov_b32 s15, s13
11506 ; GFX90A-NEXT: ;;#ASMSTART
11507 ; GFX90A-NEXT: ; use s[8:15]
11508 ; GFX90A-NEXT: ;;#ASMEND
11509 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11511 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
11513 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11514 ; GFX940-NEXT: ;;#ASMSTART
11515 ; GFX940-NEXT: ; def s[8:13]
11516 ; GFX940-NEXT: ;;#ASMEND
11517 ; GFX940-NEXT: ;;#ASMSTART
11518 ; GFX940-NEXT: ; def s[0:5]
11519 ; GFX940-NEXT: ;;#ASMEND
11520 ; GFX940-NEXT: s_mov_b32 s8, s4
11521 ; GFX940-NEXT: s_mov_b32 s9, s5
11522 ; GFX940-NEXT: s_mov_b32 s14, s12
11523 ; GFX940-NEXT: s_mov_b32 s15, s13
11524 ; GFX940-NEXT: ;;#ASMSTART
11525 ; GFX940-NEXT: ; use s[8:15]
11526 ; GFX940-NEXT: ;;#ASMEND
11527 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11528 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11529 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11530 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
11531 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11535 define void @s_shuffle_v4i64_v3i64__5_3_2_2() {
11536 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
11538 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11539 ; GFX900-NEXT: ;;#ASMSTART
11540 ; GFX900-NEXT: ; def s[8:13]
11541 ; GFX900-NEXT: ;;#ASMEND
11542 ; GFX900-NEXT: ;;#ASMSTART
11543 ; GFX900-NEXT: ; def s[4:9]
11544 ; GFX900-NEXT: ;;#ASMEND
11545 ; GFX900-NEXT: s_mov_b32 s10, s4
11546 ; GFX900-NEXT: s_mov_b32 s11, s5
11547 ; GFX900-NEXT: s_mov_b32 s14, s12
11548 ; GFX900-NEXT: s_mov_b32 s15, s13
11549 ; GFX900-NEXT: ;;#ASMSTART
11550 ; GFX900-NEXT: ; use s[8:15]
11551 ; GFX900-NEXT: ;;#ASMEND
11552 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11554 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
11556 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11557 ; GFX90A-NEXT: ;;#ASMSTART
11558 ; GFX90A-NEXT: ; def s[8:13]
11559 ; GFX90A-NEXT: ;;#ASMEND
11560 ; GFX90A-NEXT: ;;#ASMSTART
11561 ; GFX90A-NEXT: ; def s[4:9]
11562 ; GFX90A-NEXT: ;;#ASMEND
11563 ; GFX90A-NEXT: s_mov_b32 s10, s4
11564 ; GFX90A-NEXT: s_mov_b32 s11, s5
11565 ; GFX90A-NEXT: s_mov_b32 s14, s12
11566 ; GFX90A-NEXT: s_mov_b32 s15, s13
11567 ; GFX90A-NEXT: ;;#ASMSTART
11568 ; GFX90A-NEXT: ; use s[8:15]
11569 ; GFX90A-NEXT: ;;#ASMEND
11570 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11572 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
11574 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11575 ; GFX940-NEXT: ;;#ASMSTART
11576 ; GFX940-NEXT: ; def s[8:13]
11577 ; GFX940-NEXT: ;;#ASMEND
11578 ; GFX940-NEXT: ;;#ASMSTART
11579 ; GFX940-NEXT: ; def s[0:5]
11580 ; GFX940-NEXT: ;;#ASMEND
11581 ; GFX940-NEXT: s_mov_b32 s8, s4
11582 ; GFX940-NEXT: s_mov_b32 s9, s5
11583 ; GFX940-NEXT: s_mov_b32 s10, s0
11584 ; GFX940-NEXT: s_mov_b32 s11, s1
11585 ; GFX940-NEXT: s_mov_b32 s14, s12
11586 ; GFX940-NEXT: s_mov_b32 s15, s13
11587 ; GFX940-NEXT: ;;#ASMSTART
11588 ; GFX940-NEXT: ; use s[8:15]
11589 ; GFX940-NEXT: ;;#ASMEND
11590 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11591 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11592 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11593 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
11594 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11598 define void @s_shuffle_v4i64_v3i64__5_4_2_2() {
11599 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
11601 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11602 ; GFX900-NEXT: ;;#ASMSTART
11603 ; GFX900-NEXT: ; def s[12:17]
11604 ; GFX900-NEXT: ;;#ASMEND
11605 ; GFX900-NEXT: ;;#ASMSTART
11606 ; GFX900-NEXT: ; def s[8:13]
11607 ; GFX900-NEXT: ;;#ASMEND
11608 ; GFX900-NEXT: s_mov_b32 s8, s12
11609 ; GFX900-NEXT: s_mov_b32 s9, s13
11610 ; GFX900-NEXT: s_mov_b32 s12, s16
11611 ; GFX900-NEXT: s_mov_b32 s13, s17
11612 ; GFX900-NEXT: s_mov_b32 s14, s16
11613 ; GFX900-NEXT: s_mov_b32 s15, s17
11614 ; GFX900-NEXT: ;;#ASMSTART
11615 ; GFX900-NEXT: ; use s[8:15]
11616 ; GFX900-NEXT: ;;#ASMEND
11617 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11619 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
11621 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11622 ; GFX90A-NEXT: ;;#ASMSTART
11623 ; GFX90A-NEXT: ; def s[12:17]
11624 ; GFX90A-NEXT: ;;#ASMEND
11625 ; GFX90A-NEXT: ;;#ASMSTART
11626 ; GFX90A-NEXT: ; def s[8:13]
11627 ; GFX90A-NEXT: ;;#ASMEND
11628 ; GFX90A-NEXT: s_mov_b32 s8, s12
11629 ; GFX90A-NEXT: s_mov_b32 s9, s13
11630 ; GFX90A-NEXT: s_mov_b32 s12, s16
11631 ; GFX90A-NEXT: s_mov_b32 s13, s17
11632 ; GFX90A-NEXT: s_mov_b32 s14, s16
11633 ; GFX90A-NEXT: s_mov_b32 s15, s17
11634 ; GFX90A-NEXT: ;;#ASMSTART
11635 ; GFX90A-NEXT: ; use s[8:15]
11636 ; GFX90A-NEXT: ;;#ASMEND
11637 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11639 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
11641 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11642 ; GFX940-NEXT: ;;#ASMSTART
11643 ; GFX940-NEXT: ; def s[8:13]
11644 ; GFX940-NEXT: ;;#ASMEND
11645 ; GFX940-NEXT: ;;#ASMSTART
11646 ; GFX940-NEXT: ; def s[0:5]
11647 ; GFX940-NEXT: ;;#ASMEND
11648 ; GFX940-NEXT: s_mov_b32 s8, s12
11649 ; GFX940-NEXT: s_mov_b32 s9, s13
11650 ; GFX940-NEXT: s_mov_b32 s12, s4
11651 ; GFX940-NEXT: s_mov_b32 s13, s5
11652 ; GFX940-NEXT: s_mov_b32 s14, s4
11653 ; GFX940-NEXT: s_mov_b32 s15, s5
11654 ; GFX940-NEXT: ;;#ASMSTART
11655 ; GFX940-NEXT: ; use s[8:15]
11656 ; GFX940-NEXT: ;;#ASMEND
11657 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11658 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11659 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11660 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
11661 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11665 define void @s_shuffle_v4i64_v3i64__5_5_2_2() {
11666 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
11668 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11669 ; GFX900-NEXT: ;;#ASMSTART
11670 ; GFX900-NEXT: ; def s[8:13]
11671 ; GFX900-NEXT: ;;#ASMEND
11672 ; GFX900-NEXT: ;;#ASMSTART
11673 ; GFX900-NEXT: ; def s[16:21]
11674 ; GFX900-NEXT: ;;#ASMEND
11675 ; GFX900-NEXT: s_mov_b32 s8, s20
11676 ; GFX900-NEXT: s_mov_b32 s9, s21
11677 ; GFX900-NEXT: s_mov_b32 s10, s20
11678 ; GFX900-NEXT: s_mov_b32 s11, s21
11679 ; GFX900-NEXT: s_mov_b32 s14, s12
11680 ; GFX900-NEXT: s_mov_b32 s15, s13
11681 ; GFX900-NEXT: ;;#ASMSTART
11682 ; GFX900-NEXT: ; use s[8:15]
11683 ; GFX900-NEXT: ;;#ASMEND
11684 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11686 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
11688 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11689 ; GFX90A-NEXT: ;;#ASMSTART
11690 ; GFX90A-NEXT: ; def s[8:13]
11691 ; GFX90A-NEXT: ;;#ASMEND
11692 ; GFX90A-NEXT: ;;#ASMSTART
11693 ; GFX90A-NEXT: ; def s[16:21]
11694 ; GFX90A-NEXT: ;;#ASMEND
11695 ; GFX90A-NEXT: s_mov_b32 s8, s20
11696 ; GFX90A-NEXT: s_mov_b32 s9, s21
11697 ; GFX90A-NEXT: s_mov_b32 s10, s20
11698 ; GFX90A-NEXT: s_mov_b32 s11, s21
11699 ; GFX90A-NEXT: s_mov_b32 s14, s12
11700 ; GFX90A-NEXT: s_mov_b32 s15, s13
11701 ; GFX90A-NEXT: ;;#ASMSTART
11702 ; GFX90A-NEXT: ; use s[8:15]
11703 ; GFX90A-NEXT: ;;#ASMEND
11704 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11706 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
11708 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11709 ; GFX940-NEXT: ;;#ASMSTART
11710 ; GFX940-NEXT: ; def s[8:13]
11711 ; GFX940-NEXT: ;;#ASMEND
11712 ; GFX940-NEXT: ;;#ASMSTART
11713 ; GFX940-NEXT: ; def s[0:5]
11714 ; GFX940-NEXT: ;;#ASMEND
11715 ; GFX940-NEXT: s_mov_b32 s8, s4
11716 ; GFX940-NEXT: s_mov_b32 s9, s5
11717 ; GFX940-NEXT: s_mov_b32 s10, s4
11718 ; GFX940-NEXT: s_mov_b32 s11, s5
11719 ; GFX940-NEXT: s_mov_b32 s14, s12
11720 ; GFX940-NEXT: s_mov_b32 s15, s13
11721 ; GFX940-NEXT: ;;#ASMSTART
11722 ; GFX940-NEXT: ; use s[8:15]
11723 ; GFX940-NEXT: ;;#ASMEND
11724 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11725 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11726 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11727 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
11728 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11732 define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
11733 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
11735 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11736 ; GFX900-NEXT: ;;#ASMSTART
11737 ; GFX900-NEXT: ; def s[8:13]
11738 ; GFX900-NEXT: ;;#ASMEND
11739 ; GFX900-NEXT: ;;#ASMSTART
11740 ; GFX900-NEXT: ; def s[16:21]
11741 ; GFX900-NEXT: ;;#ASMEND
11742 ; GFX900-NEXT: s_mov_b32 s8, s20
11743 ; GFX900-NEXT: s_mov_b32 s9, s21
11744 ; GFX900-NEXT: s_mov_b32 s10, s20
11745 ; GFX900-NEXT: s_mov_b32 s11, s21
11746 ; GFX900-NEXT: s_mov_b32 s14, s12
11747 ; GFX900-NEXT: s_mov_b32 s15, s13
11748 ; GFX900-NEXT: ;;#ASMSTART
11749 ; GFX900-NEXT: ; use s[8:15]
11750 ; GFX900-NEXT: ;;#ASMEND
11751 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11753 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
11755 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11756 ; GFX90A-NEXT: ;;#ASMSTART
11757 ; GFX90A-NEXT: ; def s[8:13]
11758 ; GFX90A-NEXT: ;;#ASMEND
11759 ; GFX90A-NEXT: ;;#ASMSTART
11760 ; GFX90A-NEXT: ; def s[16:21]
11761 ; GFX90A-NEXT: ;;#ASMEND
11762 ; GFX90A-NEXT: s_mov_b32 s8, s20
11763 ; GFX90A-NEXT: s_mov_b32 s9, s21
11764 ; GFX90A-NEXT: s_mov_b32 s10, s20
11765 ; GFX90A-NEXT: s_mov_b32 s11, s21
11766 ; GFX90A-NEXT: s_mov_b32 s14, s12
11767 ; GFX90A-NEXT: s_mov_b32 s15, s13
11768 ; GFX90A-NEXT: ;;#ASMSTART
11769 ; GFX90A-NEXT: ; use s[8:15]
11770 ; GFX90A-NEXT: ;;#ASMEND
11771 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11773 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
11775 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11776 ; GFX940-NEXT: ;;#ASMSTART
11777 ; GFX940-NEXT: ; def s[8:13]
11778 ; GFX940-NEXT: ;;#ASMEND
11779 ; GFX940-NEXT: ;;#ASMSTART
11780 ; GFX940-NEXT: ; def s[0:5]
11781 ; GFX940-NEXT: ;;#ASMEND
11782 ; GFX940-NEXT: s_mov_b32 s8, s12
11783 ; GFX940-NEXT: s_mov_b32 s9, s13
11784 ; GFX940-NEXT: s_mov_b32 s10, s12
11785 ; GFX940-NEXT: s_mov_b32 s11, s13
11786 ; GFX940-NEXT: s_mov_b32 s14, s4
11787 ; GFX940-NEXT: s_mov_b32 s15, s5
11788 ; GFX940-NEXT: ;;#ASMSTART
11789 ; GFX940-NEXT: ; use s[8:15]
11790 ; GFX940-NEXT: ;;#ASMEND
11791 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11792 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11793 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11794 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
11795 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11799 define void @s_shuffle_v4i64_v3i64__5_5_0_2() {
11800 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
11802 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11803 ; GFX900-NEXT: ;;#ASMSTART
11804 ; GFX900-NEXT: ; def s[12:17]
11805 ; GFX900-NEXT: ;;#ASMEND
11806 ; GFX900-NEXT: ;;#ASMSTART
11807 ; GFX900-NEXT: ; def s[20:25]
11808 ; GFX900-NEXT: ;;#ASMEND
11809 ; GFX900-NEXT: s_mov_b32 s8, s24
11810 ; GFX900-NEXT: s_mov_b32 s9, s25
11811 ; GFX900-NEXT: s_mov_b32 s10, s24
11812 ; GFX900-NEXT: s_mov_b32 s11, s25
11813 ; GFX900-NEXT: s_mov_b32 s14, s16
11814 ; GFX900-NEXT: s_mov_b32 s15, s17
11815 ; GFX900-NEXT: ;;#ASMSTART
11816 ; GFX900-NEXT: ; use s[8:15]
11817 ; GFX900-NEXT: ;;#ASMEND
11818 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11820 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
11822 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11823 ; GFX90A-NEXT: ;;#ASMSTART
11824 ; GFX90A-NEXT: ; def s[12:17]
11825 ; GFX90A-NEXT: ;;#ASMEND
11826 ; GFX90A-NEXT: ;;#ASMSTART
11827 ; GFX90A-NEXT: ; def s[20:25]
11828 ; GFX90A-NEXT: ;;#ASMEND
11829 ; GFX90A-NEXT: s_mov_b32 s8, s24
11830 ; GFX90A-NEXT: s_mov_b32 s9, s25
11831 ; GFX90A-NEXT: s_mov_b32 s10, s24
11832 ; GFX90A-NEXT: s_mov_b32 s11, s25
11833 ; GFX90A-NEXT: s_mov_b32 s14, s16
11834 ; GFX90A-NEXT: s_mov_b32 s15, s17
11835 ; GFX90A-NEXT: ;;#ASMSTART
11836 ; GFX90A-NEXT: ; use s[8:15]
11837 ; GFX90A-NEXT: ;;#ASMEND
11838 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11840 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
11842 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11843 ; GFX940-NEXT: ;;#ASMSTART
11844 ; GFX940-NEXT: ; def s[8:13]
11845 ; GFX940-NEXT: ;;#ASMEND
11846 ; GFX940-NEXT: ;;#ASMSTART
11847 ; GFX940-NEXT: ; def s[0:5]
11848 ; GFX940-NEXT: ;;#ASMEND
11849 ; GFX940-NEXT: s_mov_b32 s8, s12
11850 ; GFX940-NEXT: s_mov_b32 s9, s13
11851 ; GFX940-NEXT: s_mov_b32 s10, s12
11852 ; GFX940-NEXT: s_mov_b32 s11, s13
11853 ; GFX940-NEXT: s_mov_b32 s12, s0
11854 ; GFX940-NEXT: s_mov_b32 s13, s1
11855 ; GFX940-NEXT: s_mov_b32 s14, s4
11856 ; GFX940-NEXT: s_mov_b32 s15, s5
11857 ; GFX940-NEXT: ;;#ASMSTART
11858 ; GFX940-NEXT: ; use s[8:15]
11859 ; GFX940-NEXT: ;;#ASMEND
11860 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11861 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11862 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11863 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
11864 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11868 define void @s_shuffle_v4i64_v3i64__5_5_1_2() {
11869 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
11871 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11872 ; GFX900-NEXT: ;;#ASMSTART
11873 ; GFX900-NEXT: ; def s[12:17]
11874 ; GFX900-NEXT: ;;#ASMEND
11875 ; GFX900-NEXT: ;;#ASMSTART
11876 ; GFX900-NEXT: ; def s[8:13]
11877 ; GFX900-NEXT: ;;#ASMEND
11878 ; GFX900-NEXT: s_mov_b32 s8, s12
11879 ; GFX900-NEXT: s_mov_b32 s9, s13
11880 ; GFX900-NEXT: s_mov_b32 s10, s12
11881 ; GFX900-NEXT: s_mov_b32 s11, s13
11882 ; GFX900-NEXT: s_mov_b32 s12, s14
11883 ; GFX900-NEXT: s_mov_b32 s13, s15
11884 ; GFX900-NEXT: s_mov_b32 s14, s16
11885 ; GFX900-NEXT: s_mov_b32 s15, s17
11886 ; GFX900-NEXT: ;;#ASMSTART
11887 ; GFX900-NEXT: ; use s[8:15]
11888 ; GFX900-NEXT: ;;#ASMEND
11889 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11891 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
11893 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11894 ; GFX90A-NEXT: ;;#ASMSTART
11895 ; GFX90A-NEXT: ; def s[12:17]
11896 ; GFX90A-NEXT: ;;#ASMEND
11897 ; GFX90A-NEXT: ;;#ASMSTART
11898 ; GFX90A-NEXT: ; def s[8:13]
11899 ; GFX90A-NEXT: ;;#ASMEND
11900 ; GFX90A-NEXT: s_mov_b32 s8, s12
11901 ; GFX90A-NEXT: s_mov_b32 s9, s13
11902 ; GFX90A-NEXT: s_mov_b32 s10, s12
11903 ; GFX90A-NEXT: s_mov_b32 s11, s13
11904 ; GFX90A-NEXT: s_mov_b32 s12, s14
11905 ; GFX90A-NEXT: s_mov_b32 s13, s15
11906 ; GFX90A-NEXT: s_mov_b32 s14, s16
11907 ; GFX90A-NEXT: s_mov_b32 s15, s17
11908 ; GFX90A-NEXT: ;;#ASMSTART
11909 ; GFX90A-NEXT: ; use s[8:15]
11910 ; GFX90A-NEXT: ;;#ASMEND
11911 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11913 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
11915 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11916 ; GFX940-NEXT: ;;#ASMSTART
11917 ; GFX940-NEXT: ; def s[8:13]
11918 ; GFX940-NEXT: ;;#ASMEND
11919 ; GFX940-NEXT: ;;#ASMSTART
11920 ; GFX940-NEXT: ; def s[0:5]
11921 ; GFX940-NEXT: ;;#ASMEND
11922 ; GFX940-NEXT: s_mov_b32 s8, s12
11923 ; GFX940-NEXT: s_mov_b32 s9, s13
11924 ; GFX940-NEXT: s_mov_b32 s10, s12
11925 ; GFX940-NEXT: s_mov_b32 s11, s13
11926 ; GFX940-NEXT: s_mov_b32 s12, s2
11927 ; GFX940-NEXT: s_mov_b32 s13, s3
11928 ; GFX940-NEXT: s_mov_b32 s14, s4
11929 ; GFX940-NEXT: s_mov_b32 s15, s5
11930 ; GFX940-NEXT: ;;#ASMSTART
11931 ; GFX940-NEXT: ; use s[8:15]
11932 ; GFX940-NEXT: ;;#ASMEND
11933 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11934 %vec0 = call <3 x i64> asm "; def $0", "=s"()
11935 %vec1 = call <3 x i64> asm "; def $0", "=s"()
11936 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
11937 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11941 define void @s_shuffle_v4i64_v3i64__5_5_3_2() {
11942 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
11944 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11945 ; GFX900-NEXT: ;;#ASMSTART
11946 ; GFX900-NEXT: ; def s[12:17]
11947 ; GFX900-NEXT: ;;#ASMEND
11948 ; GFX900-NEXT: ;;#ASMSTART
11949 ; GFX900-NEXT: ; def s[20:25]
11950 ; GFX900-NEXT: ;;#ASMEND
11951 ; GFX900-NEXT: s_mov_b32 s8, s24
11952 ; GFX900-NEXT: s_mov_b32 s9, s25
11953 ; GFX900-NEXT: s_mov_b32 s10, s24
11954 ; GFX900-NEXT: s_mov_b32 s11, s25
11955 ; GFX900-NEXT: s_mov_b32 s12, s20
11956 ; GFX900-NEXT: s_mov_b32 s13, s21
11957 ; GFX900-NEXT: s_mov_b32 s14, s16
11958 ; GFX900-NEXT: s_mov_b32 s15, s17
11959 ; GFX900-NEXT: ;;#ASMSTART
11960 ; GFX900-NEXT: ; use s[8:15]
11961 ; GFX900-NEXT: ;;#ASMEND
11962 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11964 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
11966 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11967 ; GFX90A-NEXT: ;;#ASMSTART
11968 ; GFX90A-NEXT: ; def s[12:17]
11969 ; GFX90A-NEXT: ;;#ASMEND
11970 ; GFX90A-NEXT: ;;#ASMSTART
11971 ; GFX90A-NEXT: ; def s[20:25]
11972 ; GFX90A-NEXT: ;;#ASMEND
11973 ; GFX90A-NEXT: s_mov_b32 s8, s24
11974 ; GFX90A-NEXT: s_mov_b32 s9, s25
11975 ; GFX90A-NEXT: s_mov_b32 s10, s24
11976 ; GFX90A-NEXT: s_mov_b32 s11, s25
11977 ; GFX90A-NEXT: s_mov_b32 s12, s20
11978 ; GFX90A-NEXT: s_mov_b32 s13, s21
11979 ; GFX90A-NEXT: s_mov_b32 s14, s16
11980 ; GFX90A-NEXT: s_mov_b32 s15, s17
11981 ; GFX90A-NEXT: ;;#ASMSTART
11982 ; GFX90A-NEXT: ; use s[8:15]
11983 ; GFX90A-NEXT: ;;#ASMEND
11984 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11986 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
11988 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11989 ; GFX940-NEXT: ;;#ASMSTART
11990 ; GFX940-NEXT: ; def s[12:17]
11991 ; GFX940-NEXT: ;;#ASMEND
11992 ; GFX940-NEXT: ;;#ASMSTART
11993 ; GFX940-NEXT: ; def s[0:5]
11994 ; GFX940-NEXT: ;;#ASMEND
11995 ; GFX940-NEXT: s_mov_b32 s8, s16
11996 ; GFX940-NEXT: s_mov_b32 s9, s17
11997 ; GFX940-NEXT: s_mov_b32 s10, s16
11998 ; GFX940-NEXT: s_mov_b32 s11, s17
11999 ; GFX940-NEXT: s_mov_b32 s14, s4
12000 ; GFX940-NEXT: s_mov_b32 s15, s5
12001 ; GFX940-NEXT: ;;#ASMSTART
12002 ; GFX940-NEXT: ; use s[8:15]
12003 ; GFX940-NEXT: ;;#ASMEND
12004 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12005 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12006 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12007 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
12008 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12012 define void @s_shuffle_v4i64_v3i64__5_5_4_2() {
12013 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
12015 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12016 ; GFX900-NEXT: ;;#ASMSTART
12017 ; GFX900-NEXT: ; def s[12:17]
12018 ; GFX900-NEXT: ;;#ASMEND
12019 ; GFX900-NEXT: ;;#ASMSTART
12020 ; GFX900-NEXT: ; def s[20:25]
12021 ; GFX900-NEXT: ;;#ASMEND
12022 ; GFX900-NEXT: s_mov_b32 s8, s24
12023 ; GFX900-NEXT: s_mov_b32 s9, s25
12024 ; GFX900-NEXT: s_mov_b32 s10, s24
12025 ; GFX900-NEXT: s_mov_b32 s11, s25
12026 ; GFX900-NEXT: s_mov_b32 s12, s22
12027 ; GFX900-NEXT: s_mov_b32 s13, s23
12028 ; GFX900-NEXT: s_mov_b32 s14, s16
12029 ; GFX900-NEXT: s_mov_b32 s15, s17
12030 ; GFX900-NEXT: ;;#ASMSTART
12031 ; GFX900-NEXT: ; use s[8:15]
12032 ; GFX900-NEXT: ;;#ASMEND
12033 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12035 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
12037 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12038 ; GFX90A-NEXT: ;;#ASMSTART
12039 ; GFX90A-NEXT: ; def s[12:17]
12040 ; GFX90A-NEXT: ;;#ASMEND
12041 ; GFX90A-NEXT: ;;#ASMSTART
12042 ; GFX90A-NEXT: ; def s[20:25]
12043 ; GFX90A-NEXT: ;;#ASMEND
12044 ; GFX90A-NEXT: s_mov_b32 s8, s24
12045 ; GFX90A-NEXT: s_mov_b32 s9, s25
12046 ; GFX90A-NEXT: s_mov_b32 s10, s24
12047 ; GFX90A-NEXT: s_mov_b32 s11, s25
12048 ; GFX90A-NEXT: s_mov_b32 s12, s22
12049 ; GFX90A-NEXT: s_mov_b32 s13, s23
12050 ; GFX90A-NEXT: s_mov_b32 s14, s16
12051 ; GFX90A-NEXT: s_mov_b32 s15, s17
12052 ; GFX90A-NEXT: ;;#ASMSTART
12053 ; GFX90A-NEXT: ; use s[8:15]
12054 ; GFX90A-NEXT: ;;#ASMEND
12055 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12057 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
12059 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12060 ; GFX940-NEXT: ;;#ASMSTART
12061 ; GFX940-NEXT: ; def s[12:17]
12062 ; GFX940-NEXT: ;;#ASMEND
12063 ; GFX940-NEXT: ;;#ASMSTART
12064 ; GFX940-NEXT: ; def s[0:5]
12065 ; GFX940-NEXT: ;;#ASMEND
12066 ; GFX940-NEXT: s_mov_b32 s8, s16
12067 ; GFX940-NEXT: s_mov_b32 s9, s17
12068 ; GFX940-NEXT: s_mov_b32 s10, s16
12069 ; GFX940-NEXT: s_mov_b32 s11, s17
12070 ; GFX940-NEXT: s_mov_b32 s12, s14
12071 ; GFX940-NEXT: s_mov_b32 s13, s15
12072 ; GFX940-NEXT: s_mov_b32 s14, s4
12073 ; GFX940-NEXT: s_mov_b32 s15, s5
12074 ; GFX940-NEXT: ;;#ASMSTART
12075 ; GFX940-NEXT: ; use s[8:15]
12076 ; GFX940-NEXT: ;;#ASMEND
12077 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12078 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12079 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12080 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
12081 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12085 define void @s_shuffle_v4i64_v3i64__u_3_3_3() {
12086 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_3_3_3:
12088 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12089 ; GFX9-NEXT: ;;#ASMSTART
12090 ; GFX9-NEXT: ; use s[8:15]
12091 ; GFX9-NEXT: ;;#ASMEND
12092 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12093 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12094 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
12095 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12099 define void @s_shuffle_v4i64_v3i64__0_3_3_3() {
12100 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
12102 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12103 ; GFX900-NEXT: ;;#ASMSTART
12104 ; GFX900-NEXT: ; def s[8:13]
12105 ; GFX900-NEXT: ;;#ASMEND
12106 ; GFX900-NEXT: ;;#ASMSTART
12107 ; GFX900-NEXT: ; use s[8:15]
12108 ; GFX900-NEXT: ;;#ASMEND
12109 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12111 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
12113 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12114 ; GFX90A-NEXT: ;;#ASMSTART
12115 ; GFX90A-NEXT: ; def s[8:13]
12116 ; GFX90A-NEXT: ;;#ASMEND
12117 ; GFX90A-NEXT: ;;#ASMSTART
12118 ; GFX90A-NEXT: ; use s[8:15]
12119 ; GFX90A-NEXT: ;;#ASMEND
12120 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12122 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
12124 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12125 ; GFX940-NEXT: ;;#ASMSTART
12126 ; GFX940-NEXT: ; def s[8:13]
12127 ; GFX940-NEXT: ;;#ASMEND
12128 ; GFX940-NEXT: s_nop 0
12129 ; GFX940-NEXT: ;;#ASMSTART
12130 ; GFX940-NEXT: ; use s[8:15]
12131 ; GFX940-NEXT: ;;#ASMEND
12132 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12133 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12134 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
12135 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12139 define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
12140 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
12142 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12143 ; GFX900-NEXT: ;;#ASMSTART
12144 ; GFX900-NEXT: ; def s[4:9]
12145 ; GFX900-NEXT: ;;#ASMEND
12146 ; GFX900-NEXT: s_mov_b32 s8, s6
12147 ; GFX900-NEXT: s_mov_b32 s9, s7
12148 ; GFX900-NEXT: ;;#ASMSTART
12149 ; GFX900-NEXT: ; use s[8:15]
12150 ; GFX900-NEXT: ;;#ASMEND
12151 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12153 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
12155 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12156 ; GFX90A-NEXT: ;;#ASMSTART
12157 ; GFX90A-NEXT: ; def s[4:9]
12158 ; GFX90A-NEXT: ;;#ASMEND
12159 ; GFX90A-NEXT: s_mov_b32 s8, s6
12160 ; GFX90A-NEXT: s_mov_b32 s9, s7
12161 ; GFX90A-NEXT: ;;#ASMSTART
12162 ; GFX90A-NEXT: ; use s[8:15]
12163 ; GFX90A-NEXT: ;;#ASMEND
12164 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12166 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
12168 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12169 ; GFX940-NEXT: ;;#ASMSTART
12170 ; GFX940-NEXT: ; def s[0:5]
12171 ; GFX940-NEXT: ;;#ASMEND
12172 ; GFX940-NEXT: s_mov_b32 s8, s2
12173 ; GFX940-NEXT: s_mov_b32 s9, s3
12174 ; GFX940-NEXT: ;;#ASMSTART
12175 ; GFX940-NEXT: ; use s[8:15]
12176 ; GFX940-NEXT: ;;#ASMEND
12177 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12178 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12179 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
12180 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12184 define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
12185 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
12187 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12188 ; GFX900-NEXT: ;;#ASMSTART
12189 ; GFX900-NEXT: ; def s[4:9]
12190 ; GFX900-NEXT: ;;#ASMEND
12191 ; GFX900-NEXT: ;;#ASMSTART
12192 ; GFX900-NEXT: ; use s[8:15]
12193 ; GFX900-NEXT: ;;#ASMEND
12194 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12196 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
12198 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12199 ; GFX90A-NEXT: ;;#ASMSTART
12200 ; GFX90A-NEXT: ; def s[4:9]
12201 ; GFX90A-NEXT: ;;#ASMEND
12202 ; GFX90A-NEXT: ;;#ASMSTART
12203 ; GFX90A-NEXT: ; use s[8:15]
12204 ; GFX90A-NEXT: ;;#ASMEND
12205 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12207 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
12209 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12210 ; GFX940-NEXT: ;;#ASMSTART
12211 ; GFX940-NEXT: ; def s[0:5]
12212 ; GFX940-NEXT: ;;#ASMEND
12213 ; GFX940-NEXT: s_mov_b32 s8, s4
12214 ; GFX940-NEXT: s_mov_b32 s9, s5
12215 ; GFX940-NEXT: ;;#ASMSTART
12216 ; GFX940-NEXT: ; use s[8:15]
12217 ; GFX940-NEXT: ;;#ASMEND
12218 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12219 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12220 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
12221 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12225 define void @s_shuffle_v4i64_v3i64__3_3_3_3() {
12226 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_3_3_3:
12228 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12229 ; GFX9-NEXT: ;;#ASMSTART
12230 ; GFX9-NEXT: ; use s[8:15]
12231 ; GFX9-NEXT: ;;#ASMEND
12232 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12233 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12234 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
12235 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12239 define void @s_shuffle_v4i64_v3i64__4_3_3_3() {
12240 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
12242 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12243 ; GFX900-NEXT: ;;#ASMSTART
12244 ; GFX900-NEXT: ; def s[4:9]
12245 ; GFX900-NEXT: ;;#ASMEND
12246 ; GFX900-NEXT: s_mov_b32 s8, s6
12247 ; GFX900-NEXT: s_mov_b32 s9, s7
12248 ; GFX900-NEXT: s_mov_b32 s10, s4
12249 ; GFX900-NEXT: s_mov_b32 s11, s5
12250 ; GFX900-NEXT: s_mov_b32 s12, s4
12251 ; GFX900-NEXT: s_mov_b32 s13, s5
12252 ; GFX900-NEXT: s_mov_b32 s14, s4
12253 ; GFX900-NEXT: s_mov_b32 s15, s5
12254 ; GFX900-NEXT: ;;#ASMSTART
12255 ; GFX900-NEXT: ; use s[8:15]
12256 ; GFX900-NEXT: ;;#ASMEND
12257 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12259 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
12261 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12262 ; GFX90A-NEXT: ;;#ASMSTART
12263 ; GFX90A-NEXT: ; def s[4:9]
12264 ; GFX90A-NEXT: ;;#ASMEND
12265 ; GFX90A-NEXT: s_mov_b32 s8, s6
12266 ; GFX90A-NEXT: s_mov_b32 s9, s7
12267 ; GFX90A-NEXT: s_mov_b32 s10, s4
12268 ; GFX90A-NEXT: s_mov_b32 s11, s5
12269 ; GFX90A-NEXT: s_mov_b32 s12, s4
12270 ; GFX90A-NEXT: s_mov_b32 s13, s5
12271 ; GFX90A-NEXT: s_mov_b32 s14, s4
12272 ; GFX90A-NEXT: s_mov_b32 s15, s5
12273 ; GFX90A-NEXT: ;;#ASMSTART
12274 ; GFX90A-NEXT: ; use s[8:15]
12275 ; GFX90A-NEXT: ;;#ASMEND
12276 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12278 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
12280 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12281 ; GFX940-NEXT: ;;#ASMSTART
12282 ; GFX940-NEXT: ; def s[0:5]
12283 ; GFX940-NEXT: ;;#ASMEND
12284 ; GFX940-NEXT: s_mov_b32 s8, s2
12285 ; GFX940-NEXT: s_mov_b32 s9, s3
12286 ; GFX940-NEXT: s_mov_b32 s10, s0
12287 ; GFX940-NEXT: s_mov_b32 s11, s1
12288 ; GFX940-NEXT: s_mov_b32 s12, s0
12289 ; GFX940-NEXT: s_mov_b32 s13, s1
12290 ; GFX940-NEXT: s_mov_b32 s14, s0
12291 ; GFX940-NEXT: s_mov_b32 s15, s1
12292 ; GFX940-NEXT: ;;#ASMSTART
12293 ; GFX940-NEXT: ; use s[8:15]
12294 ; GFX940-NEXT: ;;#ASMEND
12295 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12296 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12297 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12298 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
12299 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12303 define void @s_shuffle_v4i64_v3i64__5_3_3_3() {
12304 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
12306 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12307 ; GFX900-NEXT: ;;#ASMSTART
12308 ; GFX900-NEXT: ; def s[4:9]
12309 ; GFX900-NEXT: ;;#ASMEND
12310 ; GFX900-NEXT: s_mov_b32 s10, s4
12311 ; GFX900-NEXT: s_mov_b32 s11, s5
12312 ; GFX900-NEXT: s_mov_b32 s12, s4
12313 ; GFX900-NEXT: s_mov_b32 s13, s5
12314 ; GFX900-NEXT: s_mov_b32 s14, s4
12315 ; GFX900-NEXT: s_mov_b32 s15, s5
12316 ; GFX900-NEXT: ;;#ASMSTART
12317 ; GFX900-NEXT: ; use s[8:15]
12318 ; GFX900-NEXT: ;;#ASMEND
12319 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12321 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
12323 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12324 ; GFX90A-NEXT: ;;#ASMSTART
12325 ; GFX90A-NEXT: ; def s[4:9]
12326 ; GFX90A-NEXT: ;;#ASMEND
12327 ; GFX90A-NEXT: s_mov_b32 s10, s4
12328 ; GFX90A-NEXT: s_mov_b32 s11, s5
12329 ; GFX90A-NEXT: s_mov_b32 s12, s4
12330 ; GFX90A-NEXT: s_mov_b32 s13, s5
12331 ; GFX90A-NEXT: s_mov_b32 s14, s4
12332 ; GFX90A-NEXT: s_mov_b32 s15, s5
12333 ; GFX90A-NEXT: ;;#ASMSTART
12334 ; GFX90A-NEXT: ; use s[8:15]
12335 ; GFX90A-NEXT: ;;#ASMEND
12336 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12338 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
12340 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12341 ; GFX940-NEXT: ;;#ASMSTART
12342 ; GFX940-NEXT: ; def s[0:5]
12343 ; GFX940-NEXT: ;;#ASMEND
12344 ; GFX940-NEXT: s_mov_b32 s8, s4
12345 ; GFX940-NEXT: s_mov_b32 s9, s5
12346 ; GFX940-NEXT: s_mov_b32 s10, s0
12347 ; GFX940-NEXT: s_mov_b32 s11, s1
12348 ; GFX940-NEXT: s_mov_b32 s12, s0
12349 ; GFX940-NEXT: s_mov_b32 s13, s1
12350 ; GFX940-NEXT: s_mov_b32 s14, s0
12351 ; GFX940-NEXT: s_mov_b32 s15, s1
12352 ; GFX940-NEXT: ;;#ASMSTART
12353 ; GFX940-NEXT: ; use s[8:15]
12354 ; GFX940-NEXT: ;;#ASMEND
12355 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12356 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12357 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12358 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
12359 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12363 define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
12364 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
12366 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12367 ; GFX900-NEXT: ;;#ASMSTART
12368 ; GFX900-NEXT: ; def s[4:9]
12369 ; GFX900-NEXT: ;;#ASMEND
12370 ; GFX900-NEXT: s_mov_b32 s12, s4
12371 ; GFX900-NEXT: s_mov_b32 s13, s5
12372 ; GFX900-NEXT: s_mov_b32 s14, s4
12373 ; GFX900-NEXT: s_mov_b32 s15, s5
12374 ; GFX900-NEXT: ;;#ASMSTART
12375 ; GFX900-NEXT: ; use s[8:15]
12376 ; GFX900-NEXT: ;;#ASMEND
12377 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12379 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
12381 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12382 ; GFX90A-NEXT: ;;#ASMSTART
12383 ; GFX90A-NEXT: ; def s[4:9]
12384 ; GFX90A-NEXT: ;;#ASMEND
12385 ; GFX90A-NEXT: s_mov_b32 s12, s4
12386 ; GFX90A-NEXT: s_mov_b32 s13, s5
12387 ; GFX90A-NEXT: s_mov_b32 s14, s4
12388 ; GFX90A-NEXT: s_mov_b32 s15, s5
12389 ; GFX90A-NEXT: ;;#ASMSTART
12390 ; GFX90A-NEXT: ; use s[8:15]
12391 ; GFX90A-NEXT: ;;#ASMEND
12392 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12394 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
12396 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12397 ; GFX940-NEXT: ;;#ASMSTART
12398 ; GFX940-NEXT: ; def s[0:5]
12399 ; GFX940-NEXT: ;;#ASMEND
12400 ; GFX940-NEXT: s_mov_b32 s8, s4
12401 ; GFX940-NEXT: s_mov_b32 s9, s5
12402 ; GFX940-NEXT: s_mov_b32 s12, s0
12403 ; GFX940-NEXT: s_mov_b32 s13, s1
12404 ; GFX940-NEXT: s_mov_b32 s14, s0
12405 ; GFX940-NEXT: s_mov_b32 s15, s1
12406 ; GFX940-NEXT: ;;#ASMSTART
12407 ; GFX940-NEXT: ; use s[8:15]
12408 ; GFX940-NEXT: ;;#ASMEND
12409 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12410 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12411 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12412 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
12413 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12417 define void @s_shuffle_v4i64_v3i64__5_0_3_3() {
12418 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
12420 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12421 ; GFX900-NEXT: ;;#ASMSTART
12422 ; GFX900-NEXT: ; def s[4:9]
12423 ; GFX900-NEXT: ;;#ASMEND
12424 ; GFX900-NEXT: ;;#ASMSTART
12425 ; GFX900-NEXT: ; def s[16:21]
12426 ; GFX900-NEXT: ;;#ASMEND
12427 ; GFX900-NEXT: s_mov_b32 s8, s20
12428 ; GFX900-NEXT: s_mov_b32 s9, s21
12429 ; GFX900-NEXT: s_mov_b32 s10, s4
12430 ; GFX900-NEXT: s_mov_b32 s11, s5
12431 ; GFX900-NEXT: s_mov_b32 s12, s16
12432 ; GFX900-NEXT: s_mov_b32 s13, s17
12433 ; GFX900-NEXT: s_mov_b32 s14, s16
12434 ; GFX900-NEXT: s_mov_b32 s15, s17
12435 ; GFX900-NEXT: ;;#ASMSTART
12436 ; GFX900-NEXT: ; use s[8:15]
12437 ; GFX900-NEXT: ;;#ASMEND
12438 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12440 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
12442 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12443 ; GFX90A-NEXT: ;;#ASMSTART
12444 ; GFX90A-NEXT: ; def s[4:9]
12445 ; GFX90A-NEXT: ;;#ASMEND
12446 ; GFX90A-NEXT: ;;#ASMSTART
12447 ; GFX90A-NEXT: ; def s[16:21]
12448 ; GFX90A-NEXT: ;;#ASMEND
12449 ; GFX90A-NEXT: s_mov_b32 s8, s20
12450 ; GFX90A-NEXT: s_mov_b32 s9, s21
12451 ; GFX90A-NEXT: s_mov_b32 s10, s4
12452 ; GFX90A-NEXT: s_mov_b32 s11, s5
12453 ; GFX90A-NEXT: s_mov_b32 s12, s16
12454 ; GFX90A-NEXT: s_mov_b32 s13, s17
12455 ; GFX90A-NEXT: s_mov_b32 s14, s16
12456 ; GFX90A-NEXT: s_mov_b32 s15, s17
12457 ; GFX90A-NEXT: ;;#ASMSTART
12458 ; GFX90A-NEXT: ; use s[8:15]
12459 ; GFX90A-NEXT: ;;#ASMEND
12460 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12462 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
12464 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12465 ; GFX940-NEXT: ;;#ASMSTART
12466 ; GFX940-NEXT: ; def s[0:5]
12467 ; GFX940-NEXT: ;;#ASMEND
12468 ; GFX940-NEXT: s_mov_b32 s10, s0
12469 ; GFX940-NEXT: ;;#ASMSTART
12470 ; GFX940-NEXT: ; def s[4:9]
12471 ; GFX940-NEXT: ;;#ASMEND
12472 ; GFX940-NEXT: s_mov_b32 s11, s1
12473 ; GFX940-NEXT: s_mov_b32 s12, s4
12474 ; GFX940-NEXT: s_mov_b32 s13, s5
12475 ; GFX940-NEXT: s_mov_b32 s14, s4
12476 ; GFX940-NEXT: s_mov_b32 s15, s5
12477 ; GFX940-NEXT: ;;#ASMSTART
12478 ; GFX940-NEXT: ; use s[8:15]
12479 ; GFX940-NEXT: ;;#ASMEND
12480 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12481 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12482 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12483 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
12484 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12488 define void @s_shuffle_v4i64_v3i64__5_1_3_3() {
12489 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
12491 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12492 ; GFX900-NEXT: ;;#ASMSTART
12493 ; GFX900-NEXT: ; def s[8:13]
12494 ; GFX900-NEXT: ;;#ASMEND
12495 ; GFX900-NEXT: ;;#ASMSTART
12496 ; GFX900-NEXT: ; def s[4:9]
12497 ; GFX900-NEXT: ;;#ASMEND
12498 ; GFX900-NEXT: s_mov_b32 s12, s4
12499 ; GFX900-NEXT: s_mov_b32 s13, s5
12500 ; GFX900-NEXT: s_mov_b32 s14, s4
12501 ; GFX900-NEXT: s_mov_b32 s15, s5
12502 ; GFX900-NEXT: ;;#ASMSTART
12503 ; GFX900-NEXT: ; use s[8:15]
12504 ; GFX900-NEXT: ;;#ASMEND
12505 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12507 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
12509 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12510 ; GFX90A-NEXT: ;;#ASMSTART
12511 ; GFX90A-NEXT: ; def s[8:13]
12512 ; GFX90A-NEXT: ;;#ASMEND
12513 ; GFX90A-NEXT: ;;#ASMSTART
12514 ; GFX90A-NEXT: ; def s[4:9]
12515 ; GFX90A-NEXT: ;;#ASMEND
12516 ; GFX90A-NEXT: s_mov_b32 s12, s4
12517 ; GFX90A-NEXT: s_mov_b32 s13, s5
12518 ; GFX90A-NEXT: s_mov_b32 s14, s4
12519 ; GFX90A-NEXT: s_mov_b32 s15, s5
12520 ; GFX90A-NEXT: ;;#ASMSTART
12521 ; GFX90A-NEXT: ; use s[8:15]
12522 ; GFX90A-NEXT: ;;#ASMEND
12523 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12525 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
12527 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12528 ; GFX940-NEXT: ;;#ASMSTART
12529 ; GFX940-NEXT: ; def s[8:13]
12530 ; GFX940-NEXT: ;;#ASMEND
12531 ; GFX940-NEXT: ;;#ASMSTART
12532 ; GFX940-NEXT: ; def s[0:5]
12533 ; GFX940-NEXT: ;;#ASMEND
12534 ; GFX940-NEXT: s_mov_b32 s8, s4
12535 ; GFX940-NEXT: s_mov_b32 s9, s5
12536 ; GFX940-NEXT: s_mov_b32 s12, s0
12537 ; GFX940-NEXT: s_mov_b32 s13, s1
12538 ; GFX940-NEXT: s_mov_b32 s14, s0
12539 ; GFX940-NEXT: s_mov_b32 s15, s1
12540 ; GFX940-NEXT: ;;#ASMSTART
12541 ; GFX940-NEXT: ; use s[8:15]
12542 ; GFX940-NEXT: ;;#ASMEND
12543 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12544 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12545 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12546 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
12547 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12551 define void @s_shuffle_v4i64_v3i64__5_2_3_3() {
12552 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
12554 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12555 ; GFX900-NEXT: ;;#ASMSTART
12556 ; GFX900-NEXT: ; def s[8:13]
12557 ; GFX900-NEXT: ;;#ASMEND
12558 ; GFX900-NEXT: ;;#ASMSTART
12559 ; GFX900-NEXT: ; def s[4:9]
12560 ; GFX900-NEXT: ;;#ASMEND
12561 ; GFX900-NEXT: s_mov_b32 s10, s12
12562 ; GFX900-NEXT: s_mov_b32 s11, s13
12563 ; GFX900-NEXT: s_mov_b32 s12, s4
12564 ; GFX900-NEXT: s_mov_b32 s13, s5
12565 ; GFX900-NEXT: s_mov_b32 s14, s4
12566 ; GFX900-NEXT: s_mov_b32 s15, s5
12567 ; GFX900-NEXT: ;;#ASMSTART
12568 ; GFX900-NEXT: ; use s[8:15]
12569 ; GFX900-NEXT: ;;#ASMEND
12570 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12572 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
12574 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12575 ; GFX90A-NEXT: ;;#ASMSTART
12576 ; GFX90A-NEXT: ; def s[8:13]
12577 ; GFX90A-NEXT: ;;#ASMEND
12578 ; GFX90A-NEXT: ;;#ASMSTART
12579 ; GFX90A-NEXT: ; def s[4:9]
12580 ; GFX90A-NEXT: ;;#ASMEND
12581 ; GFX90A-NEXT: s_mov_b32 s10, s12
12582 ; GFX90A-NEXT: s_mov_b32 s11, s13
12583 ; GFX90A-NEXT: s_mov_b32 s12, s4
12584 ; GFX90A-NEXT: s_mov_b32 s13, s5
12585 ; GFX90A-NEXT: s_mov_b32 s14, s4
12586 ; GFX90A-NEXT: s_mov_b32 s15, s5
12587 ; GFX90A-NEXT: ;;#ASMSTART
12588 ; GFX90A-NEXT: ; use s[8:15]
12589 ; GFX90A-NEXT: ;;#ASMEND
12590 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12592 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
12594 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12595 ; GFX940-NEXT: ;;#ASMSTART
12596 ; GFX940-NEXT: ; def s[0:5]
12597 ; GFX940-NEXT: ;;#ASMEND
12598 ; GFX940-NEXT: ;;#ASMSTART
12599 ; GFX940-NEXT: ; def s[16:21]
12600 ; GFX940-NEXT: ;;#ASMEND
12601 ; GFX940-NEXT: s_mov_b32 s8, s20
12602 ; GFX940-NEXT: s_mov_b32 s9, s21
12603 ; GFX940-NEXT: s_mov_b32 s10, s4
12604 ; GFX940-NEXT: s_mov_b32 s11, s5
12605 ; GFX940-NEXT: s_mov_b32 s12, s16
12606 ; GFX940-NEXT: s_mov_b32 s13, s17
12607 ; GFX940-NEXT: s_mov_b32 s14, s16
12608 ; GFX940-NEXT: s_mov_b32 s15, s17
12609 ; GFX940-NEXT: ;;#ASMSTART
12610 ; GFX940-NEXT: ; use s[8:15]
12611 ; GFX940-NEXT: ;;#ASMEND
12612 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12613 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12614 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
12616 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12620 define void @s_shuffle_v4i64_v3i64__5_4_3_3() {
12621 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
12623 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12624 ; GFX900-NEXT: ;;#ASMSTART
12625 ; GFX900-NEXT: ; def s[4:9]
12626 ; GFX900-NEXT: ;;#ASMEND
12627 ; GFX900-NEXT: s_mov_b32 s10, s6
12628 ; GFX900-NEXT: s_mov_b32 s11, s7
12629 ; GFX900-NEXT: s_mov_b32 s12, s4
12630 ; GFX900-NEXT: s_mov_b32 s13, s5
12631 ; GFX900-NEXT: s_mov_b32 s14, s4
12632 ; GFX900-NEXT: s_mov_b32 s15, s5
12633 ; GFX900-NEXT: ;;#ASMSTART
12634 ; GFX900-NEXT: ; use s[8:15]
12635 ; GFX900-NEXT: ;;#ASMEND
12636 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12638 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
12640 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12641 ; GFX90A-NEXT: ;;#ASMSTART
12642 ; GFX90A-NEXT: ; def s[4:9]
12643 ; GFX90A-NEXT: ;;#ASMEND
12644 ; GFX90A-NEXT: s_mov_b32 s10, s6
12645 ; GFX90A-NEXT: s_mov_b32 s11, s7
12646 ; GFX90A-NEXT: s_mov_b32 s12, s4
12647 ; GFX90A-NEXT: s_mov_b32 s13, s5
12648 ; GFX90A-NEXT: s_mov_b32 s14, s4
12649 ; GFX90A-NEXT: s_mov_b32 s15, s5
12650 ; GFX90A-NEXT: ;;#ASMSTART
12651 ; GFX90A-NEXT: ; use s[8:15]
12652 ; GFX90A-NEXT: ;;#ASMEND
12653 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12655 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
12657 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12658 ; GFX940-NEXT: ;;#ASMSTART
12659 ; GFX940-NEXT: ; def s[0:5]
12660 ; GFX940-NEXT: ;;#ASMEND
12661 ; GFX940-NEXT: s_mov_b32 s8, s4
12662 ; GFX940-NEXT: s_mov_b32 s9, s5
12663 ; GFX940-NEXT: s_mov_b32 s10, s2
12664 ; GFX940-NEXT: s_mov_b32 s11, s3
12665 ; GFX940-NEXT: s_mov_b32 s12, s0
12666 ; GFX940-NEXT: s_mov_b32 s13, s1
12667 ; GFX940-NEXT: s_mov_b32 s14, s0
12668 ; GFX940-NEXT: s_mov_b32 s15, s1
12669 ; GFX940-NEXT: ;;#ASMSTART
12670 ; GFX940-NEXT: ; use s[8:15]
12671 ; GFX940-NEXT: ;;#ASMEND
12672 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12673 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12674 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12675 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
12676 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12680 define void @s_shuffle_v4i64_v3i64__5_5_3_3() {
12681 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
12683 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12684 ; GFX900-NEXT: ;;#ASMSTART
12685 ; GFX900-NEXT: ; def s[16:21]
12686 ; GFX900-NEXT: ;;#ASMEND
12687 ; GFX900-NEXT: s_mov_b32 s8, s20
12688 ; GFX900-NEXT: s_mov_b32 s9, s21
12689 ; GFX900-NEXT: s_mov_b32 s10, s20
12690 ; GFX900-NEXT: s_mov_b32 s11, s21
12691 ; GFX900-NEXT: s_mov_b32 s12, s16
12692 ; GFX900-NEXT: s_mov_b32 s13, s17
12693 ; GFX900-NEXT: s_mov_b32 s14, s16
12694 ; GFX900-NEXT: s_mov_b32 s15, s17
12695 ; GFX900-NEXT: ;;#ASMSTART
12696 ; GFX900-NEXT: ; use s[8:15]
12697 ; GFX900-NEXT: ;;#ASMEND
12698 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12700 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
12702 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12703 ; GFX90A-NEXT: ;;#ASMSTART
12704 ; GFX90A-NEXT: ; def s[16:21]
12705 ; GFX90A-NEXT: ;;#ASMEND
12706 ; GFX90A-NEXT: s_mov_b32 s8, s20
12707 ; GFX90A-NEXT: s_mov_b32 s9, s21
12708 ; GFX90A-NEXT: s_mov_b32 s10, s20
12709 ; GFX90A-NEXT: s_mov_b32 s11, s21
12710 ; GFX90A-NEXT: s_mov_b32 s12, s16
12711 ; GFX90A-NEXT: s_mov_b32 s13, s17
12712 ; GFX90A-NEXT: s_mov_b32 s14, s16
12713 ; GFX90A-NEXT: s_mov_b32 s15, s17
12714 ; GFX90A-NEXT: ;;#ASMSTART
12715 ; GFX90A-NEXT: ; use s[8:15]
12716 ; GFX90A-NEXT: ;;#ASMEND
12717 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12719 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
12721 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12722 ; GFX940-NEXT: ;;#ASMSTART
12723 ; GFX940-NEXT: ; def s[0:5]
12724 ; GFX940-NEXT: ;;#ASMEND
12725 ; GFX940-NEXT: s_mov_b32 s8, s4
12726 ; GFX940-NEXT: s_mov_b32 s9, s5
12727 ; GFX940-NEXT: s_mov_b32 s10, s4
12728 ; GFX940-NEXT: s_mov_b32 s11, s5
12729 ; GFX940-NEXT: s_mov_b32 s12, s0
12730 ; GFX940-NEXT: s_mov_b32 s13, s1
12731 ; GFX940-NEXT: s_mov_b32 s14, s0
12732 ; GFX940-NEXT: s_mov_b32 s15, s1
12733 ; GFX940-NEXT: ;;#ASMSTART
12734 ; GFX940-NEXT: ; use s[8:15]
12735 ; GFX940-NEXT: ;;#ASMEND
12736 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12737 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12738 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12739 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
12740 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12744 define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
12745 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
12747 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12748 ; GFX900-NEXT: ;;#ASMSTART
12749 ; GFX900-NEXT: ; def s[12:17]
12750 ; GFX900-NEXT: ;;#ASMEND
12751 ; GFX900-NEXT: s_mov_b32 s8, s16
12752 ; GFX900-NEXT: s_mov_b32 s9, s17
12753 ; GFX900-NEXT: s_mov_b32 s10, s16
12754 ; GFX900-NEXT: s_mov_b32 s11, s17
12755 ; GFX900-NEXT: s_mov_b32 s14, s12
12756 ; GFX900-NEXT: s_mov_b32 s15, s13
12757 ; GFX900-NEXT: ;;#ASMSTART
12758 ; GFX900-NEXT: ; use s[8:15]
12759 ; GFX900-NEXT: ;;#ASMEND
12760 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12762 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
12764 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12765 ; GFX90A-NEXT: ;;#ASMSTART
12766 ; GFX90A-NEXT: ; def s[12:17]
12767 ; GFX90A-NEXT: ;;#ASMEND
12768 ; GFX90A-NEXT: s_mov_b32 s8, s16
12769 ; GFX90A-NEXT: s_mov_b32 s9, s17
12770 ; GFX90A-NEXT: s_mov_b32 s10, s16
12771 ; GFX90A-NEXT: s_mov_b32 s11, s17
12772 ; GFX90A-NEXT: s_mov_b32 s14, s12
12773 ; GFX90A-NEXT: s_mov_b32 s15, s13
12774 ; GFX90A-NEXT: ;;#ASMSTART
12775 ; GFX90A-NEXT: ; use s[8:15]
12776 ; GFX90A-NEXT: ;;#ASMEND
12777 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12779 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
12781 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12782 ; GFX940-NEXT: ;;#ASMSTART
12783 ; GFX940-NEXT: ; def s[0:5]
12784 ; GFX940-NEXT: ;;#ASMEND
12785 ; GFX940-NEXT: s_mov_b32 s8, s4
12786 ; GFX940-NEXT: s_mov_b32 s9, s5
12787 ; GFX940-NEXT: s_mov_b32 s10, s4
12788 ; GFX940-NEXT: s_mov_b32 s11, s5
12789 ; GFX940-NEXT: s_mov_b32 s14, s0
12790 ; GFX940-NEXT: s_mov_b32 s15, s1
12791 ; GFX940-NEXT: ;;#ASMSTART
12792 ; GFX940-NEXT: ; use s[8:15]
12793 ; GFX940-NEXT: ;;#ASMEND
12794 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12795 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12796 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12797 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
12798 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12802 define void @s_shuffle_v4i64_v3i64__5_5_0_3() {
12803 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
12805 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12806 ; GFX900-NEXT: ;;#ASMSTART
12807 ; GFX900-NEXT: ; def s[4:9]
12808 ; GFX900-NEXT: ;;#ASMEND
12809 ; GFX900-NEXT: ;;#ASMSTART
12810 ; GFX900-NEXT: ; def s[16:21]
12811 ; GFX900-NEXT: ;;#ASMEND
12812 ; GFX900-NEXT: s_mov_b32 s8, s20
12813 ; GFX900-NEXT: s_mov_b32 s9, s21
12814 ; GFX900-NEXT: s_mov_b32 s10, s20
12815 ; GFX900-NEXT: s_mov_b32 s11, s21
12816 ; GFX900-NEXT: s_mov_b32 s12, s4
12817 ; GFX900-NEXT: s_mov_b32 s13, s5
12818 ; GFX900-NEXT: s_mov_b32 s14, s16
12819 ; GFX900-NEXT: s_mov_b32 s15, s17
12820 ; GFX900-NEXT: ;;#ASMSTART
12821 ; GFX900-NEXT: ; use s[8:15]
12822 ; GFX900-NEXT: ;;#ASMEND
12823 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12825 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
12827 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12828 ; GFX90A-NEXT: ;;#ASMSTART
12829 ; GFX90A-NEXT: ; def s[4:9]
12830 ; GFX90A-NEXT: ;;#ASMEND
12831 ; GFX90A-NEXT: ;;#ASMSTART
12832 ; GFX90A-NEXT: ; def s[16:21]
12833 ; GFX90A-NEXT: ;;#ASMEND
12834 ; GFX90A-NEXT: s_mov_b32 s8, s20
12835 ; GFX90A-NEXT: s_mov_b32 s9, s21
12836 ; GFX90A-NEXT: s_mov_b32 s10, s20
12837 ; GFX90A-NEXT: s_mov_b32 s11, s21
12838 ; GFX90A-NEXT: s_mov_b32 s12, s4
12839 ; GFX90A-NEXT: s_mov_b32 s13, s5
12840 ; GFX90A-NEXT: s_mov_b32 s14, s16
12841 ; GFX90A-NEXT: s_mov_b32 s15, s17
12842 ; GFX90A-NEXT: ;;#ASMSTART
12843 ; GFX90A-NEXT: ; use s[8:15]
12844 ; GFX90A-NEXT: ;;#ASMEND
12845 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12847 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
12849 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12850 ; GFX940-NEXT: ;;#ASMSTART
12851 ; GFX940-NEXT: ; def s[0:5]
12852 ; GFX940-NEXT: ;;#ASMEND
12853 ; GFX940-NEXT: ;;#ASMSTART
12854 ; GFX940-NEXT: ; def s[16:21]
12855 ; GFX940-NEXT: ;;#ASMEND
12856 ; GFX940-NEXT: s_mov_b32 s8, s20
12857 ; GFX940-NEXT: s_mov_b32 s9, s21
12858 ; GFX940-NEXT: s_mov_b32 s10, s20
12859 ; GFX940-NEXT: s_mov_b32 s11, s21
12860 ; GFX940-NEXT: s_mov_b32 s12, s0
12861 ; GFX940-NEXT: s_mov_b32 s13, s1
12862 ; GFX940-NEXT: s_mov_b32 s14, s16
12863 ; GFX940-NEXT: s_mov_b32 s15, s17
12864 ; GFX940-NEXT: ;;#ASMSTART
12865 ; GFX940-NEXT: ; use s[8:15]
12866 ; GFX940-NEXT: ;;#ASMEND
12867 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12868 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12869 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12870 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
12871 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12875 define void @s_shuffle_v4i64_v3i64__5_5_1_3() {
12876 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
12878 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12879 ; GFX900-NEXT: ;;#ASMSTART
12880 ; GFX900-NEXT: ; def s[4:9]
12881 ; GFX900-NEXT: ;;#ASMEND
12882 ; GFX900-NEXT: ;;#ASMSTART
12883 ; GFX900-NEXT: ; def s[16:21]
12884 ; GFX900-NEXT: ;;#ASMEND
12885 ; GFX900-NEXT: s_mov_b32 s8, s20
12886 ; GFX900-NEXT: s_mov_b32 s9, s21
12887 ; GFX900-NEXT: s_mov_b32 s10, s20
12888 ; GFX900-NEXT: s_mov_b32 s11, s21
12889 ; GFX900-NEXT: s_mov_b32 s12, s6
12890 ; GFX900-NEXT: s_mov_b32 s13, s7
12891 ; GFX900-NEXT: s_mov_b32 s14, s16
12892 ; GFX900-NEXT: s_mov_b32 s15, s17
12893 ; GFX900-NEXT: ;;#ASMSTART
12894 ; GFX900-NEXT: ; use s[8:15]
12895 ; GFX900-NEXT: ;;#ASMEND
12896 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12898 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
12900 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12901 ; GFX90A-NEXT: ;;#ASMSTART
12902 ; GFX90A-NEXT: ; def s[4:9]
12903 ; GFX90A-NEXT: ;;#ASMEND
12904 ; GFX90A-NEXT: ;;#ASMSTART
12905 ; GFX90A-NEXT: ; def s[16:21]
12906 ; GFX90A-NEXT: ;;#ASMEND
12907 ; GFX90A-NEXT: s_mov_b32 s8, s20
12908 ; GFX90A-NEXT: s_mov_b32 s9, s21
12909 ; GFX90A-NEXT: s_mov_b32 s10, s20
12910 ; GFX90A-NEXT: s_mov_b32 s11, s21
12911 ; GFX90A-NEXT: s_mov_b32 s12, s6
12912 ; GFX90A-NEXT: s_mov_b32 s13, s7
12913 ; GFX90A-NEXT: s_mov_b32 s14, s16
12914 ; GFX90A-NEXT: s_mov_b32 s15, s17
12915 ; GFX90A-NEXT: ;;#ASMSTART
12916 ; GFX90A-NEXT: ; use s[8:15]
12917 ; GFX90A-NEXT: ;;#ASMEND
12918 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12920 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
12922 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12923 ; GFX940-NEXT: ;;#ASMSTART
12924 ; GFX940-NEXT: ; def s[0:5]
12925 ; GFX940-NEXT: ;;#ASMEND
12926 ; GFX940-NEXT: ;;#ASMSTART
12927 ; GFX940-NEXT: ; def s[16:21]
12928 ; GFX940-NEXT: ;;#ASMEND
12929 ; GFX940-NEXT: s_mov_b32 s8, s20
12930 ; GFX940-NEXT: s_mov_b32 s9, s21
12931 ; GFX940-NEXT: s_mov_b32 s10, s20
12932 ; GFX940-NEXT: s_mov_b32 s11, s21
12933 ; GFX940-NEXT: s_mov_b32 s12, s2
12934 ; GFX940-NEXT: s_mov_b32 s13, s3
12935 ; GFX940-NEXT: s_mov_b32 s14, s16
12936 ; GFX940-NEXT: s_mov_b32 s15, s17
12937 ; GFX940-NEXT: ;;#ASMSTART
12938 ; GFX940-NEXT: ; use s[8:15]
12939 ; GFX940-NEXT: ;;#ASMEND
12940 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12941 %vec0 = call <3 x i64> asm "; def $0", "=s"()
12942 %vec1 = call <3 x i64> asm "; def $0", "=s"()
12943 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
12944 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12948 define void @s_shuffle_v4i64_v3i64__5_5_2_3() {
12949 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
12951 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12952 ; GFX900-NEXT: ;;#ASMSTART
12953 ; GFX900-NEXT: ; def s[8:13]
12954 ; GFX900-NEXT: ;;#ASMEND
12955 ; GFX900-NEXT: ;;#ASMSTART
12956 ; GFX900-NEXT: ; def s[16:21]
12957 ; GFX900-NEXT: ;;#ASMEND
12958 ; GFX900-NEXT: s_mov_b32 s8, s20
12959 ; GFX900-NEXT: s_mov_b32 s9, s21
12960 ; GFX900-NEXT: s_mov_b32 s10, s20
12961 ; GFX900-NEXT: s_mov_b32 s11, s21
12962 ; GFX900-NEXT: s_mov_b32 s14, s16
12963 ; GFX900-NEXT: s_mov_b32 s15, s17
12964 ; GFX900-NEXT: ;;#ASMSTART
12965 ; GFX900-NEXT: ; use s[8:15]
12966 ; GFX900-NEXT: ;;#ASMEND
12967 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12969 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
12971 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12972 ; GFX90A-NEXT: ;;#ASMSTART
12973 ; GFX90A-NEXT: ; def s[8:13]
12974 ; GFX90A-NEXT: ;;#ASMEND
12975 ; GFX90A-NEXT: ;;#ASMSTART
12976 ; GFX90A-NEXT: ; def s[16:21]
12977 ; GFX90A-NEXT: ;;#ASMEND
12978 ; GFX90A-NEXT: s_mov_b32 s8, s20
12979 ; GFX90A-NEXT: s_mov_b32 s9, s21
12980 ; GFX90A-NEXT: s_mov_b32 s10, s20
12981 ; GFX90A-NEXT: s_mov_b32 s11, s21
12982 ; GFX90A-NEXT: s_mov_b32 s14, s16
12983 ; GFX90A-NEXT: s_mov_b32 s15, s17
12984 ; GFX90A-NEXT: ;;#ASMSTART
12985 ; GFX90A-NEXT: ; use s[8:15]
12986 ; GFX90A-NEXT: ;;#ASMEND
12987 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12989 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
12991 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12992 ; GFX940-NEXT: ;;#ASMSTART
12993 ; GFX940-NEXT: ; def s[8:13]
12994 ; GFX940-NEXT: ;;#ASMEND
12995 ; GFX940-NEXT: ;;#ASMSTART
12996 ; GFX940-NEXT: ; def s[0:5]
12997 ; GFX940-NEXT: ;;#ASMEND
12998 ; GFX940-NEXT: s_mov_b32 s8, s4
12999 ; GFX940-NEXT: s_mov_b32 s9, s5
13000 ; GFX940-NEXT: s_mov_b32 s10, s4
13001 ; GFX940-NEXT: s_mov_b32 s11, s5
13002 ; GFX940-NEXT: s_mov_b32 s14, s0
13003 ; GFX940-NEXT: s_mov_b32 s15, s1
13004 ; GFX940-NEXT: ;;#ASMSTART
13005 ; GFX940-NEXT: ; use s[8:15]
13006 ; GFX940-NEXT: ;;#ASMEND
13007 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13008 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13009 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13010 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
13011 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13015 define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
13016 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
13018 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13019 ; GFX900-NEXT: ;;#ASMSTART
13020 ; GFX900-NEXT: ; def s[16:21]
13021 ; GFX900-NEXT: ;;#ASMEND
13022 ; GFX900-NEXT: s_mov_b32 s8, s20
13023 ; GFX900-NEXT: s_mov_b32 s9, s21
13024 ; GFX900-NEXT: s_mov_b32 s10, s20
13025 ; GFX900-NEXT: s_mov_b32 s11, s21
13026 ; GFX900-NEXT: s_mov_b32 s12, s18
13027 ; GFX900-NEXT: s_mov_b32 s13, s19
13028 ; GFX900-NEXT: s_mov_b32 s14, s16
13029 ; GFX900-NEXT: s_mov_b32 s15, s17
13030 ; GFX900-NEXT: ;;#ASMSTART
13031 ; GFX900-NEXT: ; use s[8:15]
13032 ; GFX900-NEXT: ;;#ASMEND
13033 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13035 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
13037 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13038 ; GFX90A-NEXT: ;;#ASMSTART
13039 ; GFX90A-NEXT: ; def s[16:21]
13040 ; GFX90A-NEXT: ;;#ASMEND
13041 ; GFX90A-NEXT: s_mov_b32 s8, s20
13042 ; GFX90A-NEXT: s_mov_b32 s9, s21
13043 ; GFX90A-NEXT: s_mov_b32 s10, s20
13044 ; GFX90A-NEXT: s_mov_b32 s11, s21
13045 ; GFX90A-NEXT: s_mov_b32 s12, s18
13046 ; GFX90A-NEXT: s_mov_b32 s13, s19
13047 ; GFX90A-NEXT: s_mov_b32 s14, s16
13048 ; GFX90A-NEXT: s_mov_b32 s15, s17
13049 ; GFX90A-NEXT: ;;#ASMSTART
13050 ; GFX90A-NEXT: ; use s[8:15]
13051 ; GFX90A-NEXT: ;;#ASMEND
13052 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13054 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
13056 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13057 ; GFX940-NEXT: ;;#ASMSTART
13058 ; GFX940-NEXT: ; def s[0:5]
13059 ; GFX940-NEXT: ;;#ASMEND
13060 ; GFX940-NEXT: s_mov_b32 s8, s4
13061 ; GFX940-NEXT: s_mov_b32 s9, s5
13062 ; GFX940-NEXT: s_mov_b32 s10, s4
13063 ; GFX940-NEXT: s_mov_b32 s11, s5
13064 ; GFX940-NEXT: s_mov_b32 s12, s2
13065 ; GFX940-NEXT: s_mov_b32 s13, s3
13066 ; GFX940-NEXT: s_mov_b32 s14, s0
13067 ; GFX940-NEXT: s_mov_b32 s15, s1
13068 ; GFX940-NEXT: ;;#ASMSTART
13069 ; GFX940-NEXT: ; use s[8:15]
13070 ; GFX940-NEXT: ;;#ASMEND
13071 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13072 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13073 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13074 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
13075 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13079 define void @s_shuffle_v4i64_v3i64__u_4_4_4() {
13080 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
13082 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13083 ; GFX9-NEXT: ;;#ASMSTART
13084 ; GFX9-NEXT: ; def s[8:13]
13085 ; GFX9-NEXT: ;;#ASMEND
13086 ; GFX9-NEXT: s_mov_b32 s12, s10
13087 ; GFX9-NEXT: s_mov_b32 s13, s11
13088 ; GFX9-NEXT: s_mov_b32 s14, s10
13089 ; GFX9-NEXT: s_mov_b32 s15, s11
13090 ; GFX9-NEXT: ;;#ASMSTART
13091 ; GFX9-NEXT: ; use s[8:15]
13092 ; GFX9-NEXT: ;;#ASMEND
13093 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13094 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13095 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13096 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
13097 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13101 define void @s_shuffle_v4i64_v3i64__0_4_4_4() {
13102 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
13104 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13105 ; GFX900-NEXT: ;;#ASMSTART
13106 ; GFX900-NEXT: ; def s[8:13]
13107 ; GFX900-NEXT: ;;#ASMEND
13108 ; GFX900-NEXT: ;;#ASMSTART
13109 ; GFX900-NEXT: ; def s[12:17]
13110 ; GFX900-NEXT: ;;#ASMEND
13111 ; GFX900-NEXT: s_mov_b32 s10, s14
13112 ; GFX900-NEXT: s_mov_b32 s11, s15
13113 ; GFX900-NEXT: s_mov_b32 s12, s14
13114 ; GFX900-NEXT: s_mov_b32 s13, s15
13115 ; GFX900-NEXT: ;;#ASMSTART
13116 ; GFX900-NEXT: ; use s[8:15]
13117 ; GFX900-NEXT: ;;#ASMEND
13118 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13120 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
13122 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13123 ; GFX90A-NEXT: ;;#ASMSTART
13124 ; GFX90A-NEXT: ; def s[8:13]
13125 ; GFX90A-NEXT: ;;#ASMEND
13126 ; GFX90A-NEXT: ;;#ASMSTART
13127 ; GFX90A-NEXT: ; def s[12:17]
13128 ; GFX90A-NEXT: ;;#ASMEND
13129 ; GFX90A-NEXT: s_mov_b32 s10, s14
13130 ; GFX90A-NEXT: s_mov_b32 s11, s15
13131 ; GFX90A-NEXT: s_mov_b32 s12, s14
13132 ; GFX90A-NEXT: s_mov_b32 s13, s15
13133 ; GFX90A-NEXT: ;;#ASMSTART
13134 ; GFX90A-NEXT: ; use s[8:15]
13135 ; GFX90A-NEXT: ;;#ASMEND
13136 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13138 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
13140 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13141 ; GFX940-NEXT: ;;#ASMSTART
13142 ; GFX940-NEXT: ; def s[8:13]
13143 ; GFX940-NEXT: ;;#ASMEND
13144 ; GFX940-NEXT: ;;#ASMSTART
13145 ; GFX940-NEXT: ; def s[0:5]
13146 ; GFX940-NEXT: ;;#ASMEND
13147 ; GFX940-NEXT: s_mov_b32 s10, s2
13148 ; GFX940-NEXT: s_mov_b32 s11, s3
13149 ; GFX940-NEXT: s_mov_b32 s12, s2
13150 ; GFX940-NEXT: s_mov_b32 s13, s3
13151 ; GFX940-NEXT: s_mov_b32 s14, s2
13152 ; GFX940-NEXT: s_mov_b32 s15, s3
13153 ; GFX940-NEXT: ;;#ASMSTART
13154 ; GFX940-NEXT: ; use s[8:15]
13155 ; GFX940-NEXT: ;;#ASMEND
13156 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13157 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13158 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13159 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
13160 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13164 define void @s_shuffle_v4i64_v3i64__1_4_4_4() {
13165 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
13167 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13168 ; GFX900-NEXT: ;;#ASMSTART
13169 ; GFX900-NEXT: ; def s[4:9]
13170 ; GFX900-NEXT: ;;#ASMEND
13171 ; GFX900-NEXT: ;;#ASMSTART
13172 ; GFX900-NEXT: ; def s[8:13]
13173 ; GFX900-NEXT: ;;#ASMEND
13174 ; GFX900-NEXT: s_mov_b32 s8, s6
13175 ; GFX900-NEXT: s_mov_b32 s9, s7
13176 ; GFX900-NEXT: s_mov_b32 s12, s10
13177 ; GFX900-NEXT: s_mov_b32 s13, s11
13178 ; GFX900-NEXT: s_mov_b32 s14, s10
13179 ; GFX900-NEXT: s_mov_b32 s15, s11
13180 ; GFX900-NEXT: ;;#ASMSTART
13181 ; GFX900-NEXT: ; use s[8:15]
13182 ; GFX900-NEXT: ;;#ASMEND
13183 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13185 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
13187 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13188 ; GFX90A-NEXT: ;;#ASMSTART
13189 ; GFX90A-NEXT: ; def s[4:9]
13190 ; GFX90A-NEXT: ;;#ASMEND
13191 ; GFX90A-NEXT: ;;#ASMSTART
13192 ; GFX90A-NEXT: ; def s[8:13]
13193 ; GFX90A-NEXT: ;;#ASMEND
13194 ; GFX90A-NEXT: s_mov_b32 s8, s6
13195 ; GFX90A-NEXT: s_mov_b32 s9, s7
13196 ; GFX90A-NEXT: s_mov_b32 s12, s10
13197 ; GFX90A-NEXT: s_mov_b32 s13, s11
13198 ; GFX90A-NEXT: s_mov_b32 s14, s10
13199 ; GFX90A-NEXT: s_mov_b32 s15, s11
13200 ; GFX90A-NEXT: ;;#ASMSTART
13201 ; GFX90A-NEXT: ; use s[8:15]
13202 ; GFX90A-NEXT: ;;#ASMEND
13203 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13205 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
13207 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13208 ; GFX940-NEXT: ;;#ASMSTART
13209 ; GFX940-NEXT: ; def s[8:13]
13210 ; GFX940-NEXT: ;;#ASMEND
13211 ; GFX940-NEXT: ;;#ASMSTART
13212 ; GFX940-NEXT: ; def s[0:5]
13213 ; GFX940-NEXT: ;;#ASMEND
13214 ; GFX940-NEXT: s_mov_b32 s8, s2
13215 ; GFX940-NEXT: s_mov_b32 s9, s3
13216 ; GFX940-NEXT: s_mov_b32 s12, s10
13217 ; GFX940-NEXT: s_mov_b32 s13, s11
13218 ; GFX940-NEXT: s_mov_b32 s14, s10
13219 ; GFX940-NEXT: s_mov_b32 s15, s11
13220 ; GFX940-NEXT: ;;#ASMSTART
13221 ; GFX940-NEXT: ; use s[8:15]
13222 ; GFX940-NEXT: ;;#ASMEND
13223 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13224 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13225 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13226 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
13227 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13231 define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
13232 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
13234 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13235 ; GFX900-NEXT: ;;#ASMSTART
13236 ; GFX900-NEXT: ; def s[12:17]
13237 ; GFX900-NEXT: ;;#ASMEND
13238 ; GFX900-NEXT: ;;#ASMSTART
13239 ; GFX900-NEXT: ; def s[8:13]
13240 ; GFX900-NEXT: ;;#ASMEND
13241 ; GFX900-NEXT: s_mov_b32 s8, s16
13242 ; GFX900-NEXT: s_mov_b32 s9, s17
13243 ; GFX900-NEXT: s_mov_b32 s12, s10
13244 ; GFX900-NEXT: s_mov_b32 s13, s11
13245 ; GFX900-NEXT: s_mov_b32 s14, s10
13246 ; GFX900-NEXT: s_mov_b32 s15, s11
13247 ; GFX900-NEXT: ;;#ASMSTART
13248 ; GFX900-NEXT: ; use s[8:15]
13249 ; GFX900-NEXT: ;;#ASMEND
13250 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13252 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
13254 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13255 ; GFX90A-NEXT: ;;#ASMSTART
13256 ; GFX90A-NEXT: ; def s[12:17]
13257 ; GFX90A-NEXT: ;;#ASMEND
13258 ; GFX90A-NEXT: ;;#ASMSTART
13259 ; GFX90A-NEXT: ; def s[8:13]
13260 ; GFX90A-NEXT: ;;#ASMEND
13261 ; GFX90A-NEXT: s_mov_b32 s8, s16
13262 ; GFX90A-NEXT: s_mov_b32 s9, s17
13263 ; GFX90A-NEXT: s_mov_b32 s12, s10
13264 ; GFX90A-NEXT: s_mov_b32 s13, s11
13265 ; GFX90A-NEXT: s_mov_b32 s14, s10
13266 ; GFX90A-NEXT: s_mov_b32 s15, s11
13267 ; GFX90A-NEXT: ;;#ASMSTART
13268 ; GFX90A-NEXT: ; use s[8:15]
13269 ; GFX90A-NEXT: ;;#ASMEND
13270 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13272 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
13274 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13275 ; GFX940-NEXT: ;;#ASMSTART
13276 ; GFX940-NEXT: ; def s[8:13]
13277 ; GFX940-NEXT: ;;#ASMEND
13278 ; GFX940-NEXT: ;;#ASMSTART
13279 ; GFX940-NEXT: ; def s[0:5]
13280 ; GFX940-NEXT: ;;#ASMEND
13281 ; GFX940-NEXT: s_mov_b32 s8, s4
13282 ; GFX940-NEXT: s_mov_b32 s9, s5
13283 ; GFX940-NEXT: s_mov_b32 s12, s10
13284 ; GFX940-NEXT: s_mov_b32 s13, s11
13285 ; GFX940-NEXT: s_mov_b32 s14, s10
13286 ; GFX940-NEXT: s_mov_b32 s15, s11
13287 ; GFX940-NEXT: ;;#ASMSTART
13288 ; GFX940-NEXT: ; use s[8:15]
13289 ; GFX940-NEXT: ;;#ASMEND
13290 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13291 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13292 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13293 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
13294 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13298 define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
13299 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
13301 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13302 ; GFX9-NEXT: ;;#ASMSTART
13303 ; GFX9-NEXT: ; def s[8:13]
13304 ; GFX9-NEXT: ;;#ASMEND
13305 ; GFX9-NEXT: s_mov_b32 s12, s10
13306 ; GFX9-NEXT: s_mov_b32 s13, s11
13307 ; GFX9-NEXT: s_mov_b32 s14, s10
13308 ; GFX9-NEXT: s_mov_b32 s15, s11
13309 ; GFX9-NEXT: ;;#ASMSTART
13310 ; GFX9-NEXT: ; use s[8:15]
13311 ; GFX9-NEXT: ;;#ASMEND
13312 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13313 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13314 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13315 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
13316 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13320 define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
13321 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
13323 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13324 ; GFX9-NEXT: ;;#ASMSTART
13325 ; GFX9-NEXT: ; def s[8:13]
13326 ; GFX9-NEXT: ;;#ASMEND
13327 ; GFX9-NEXT: s_mov_b32 s8, s10
13328 ; GFX9-NEXT: s_mov_b32 s9, s11
13329 ; GFX9-NEXT: s_mov_b32 s12, s10
13330 ; GFX9-NEXT: s_mov_b32 s13, s11
13331 ; GFX9-NEXT: s_mov_b32 s14, s10
13332 ; GFX9-NEXT: s_mov_b32 s15, s11
13333 ; GFX9-NEXT: ;;#ASMSTART
13334 ; GFX9-NEXT: ; use s[8:15]
13335 ; GFX9-NEXT: ;;#ASMEND
13336 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13337 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13338 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13339 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
13340 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13344 define void @s_shuffle_v4i64_v3i64__5_4_4_4() {
13345 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
13347 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13348 ; GFX9-NEXT: ;;#ASMSTART
13349 ; GFX9-NEXT: ; def s[8:13]
13350 ; GFX9-NEXT: ;;#ASMEND
13351 ; GFX9-NEXT: s_mov_b32 s8, s12
13352 ; GFX9-NEXT: s_mov_b32 s9, s13
13353 ; GFX9-NEXT: s_mov_b32 s12, s10
13354 ; GFX9-NEXT: s_mov_b32 s13, s11
13355 ; GFX9-NEXT: s_mov_b32 s14, s10
13356 ; GFX9-NEXT: s_mov_b32 s15, s11
13357 ; GFX9-NEXT: ;;#ASMSTART
13358 ; GFX9-NEXT: ; use s[8:15]
13359 ; GFX9-NEXT: ;;#ASMEND
13360 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13361 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13362 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13363 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
13364 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13368 define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
13369 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
13371 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13372 ; GFX900-NEXT: ;;#ASMSTART
13373 ; GFX900-NEXT: ; def s[4:9]
13374 ; GFX900-NEXT: ;;#ASMEND
13375 ; GFX900-NEXT: s_mov_b32 s12, s6
13376 ; GFX900-NEXT: s_mov_b32 s13, s7
13377 ; GFX900-NEXT: s_mov_b32 s14, s6
13378 ; GFX900-NEXT: s_mov_b32 s15, s7
13379 ; GFX900-NEXT: ;;#ASMSTART
13380 ; GFX900-NEXT: ; use s[8:15]
13381 ; GFX900-NEXT: ;;#ASMEND
13382 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13384 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
13386 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13387 ; GFX90A-NEXT: ;;#ASMSTART
13388 ; GFX90A-NEXT: ; def s[4:9]
13389 ; GFX90A-NEXT: ;;#ASMEND
13390 ; GFX90A-NEXT: s_mov_b32 s12, s6
13391 ; GFX90A-NEXT: s_mov_b32 s13, s7
13392 ; GFX90A-NEXT: s_mov_b32 s14, s6
13393 ; GFX90A-NEXT: s_mov_b32 s15, s7
13394 ; GFX90A-NEXT: ;;#ASMSTART
13395 ; GFX90A-NEXT: ; use s[8:15]
13396 ; GFX90A-NEXT: ;;#ASMEND
13397 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13399 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
13401 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13402 ; GFX940-NEXT: ;;#ASMSTART
13403 ; GFX940-NEXT: ; def s[0:5]
13404 ; GFX940-NEXT: ;;#ASMEND
13405 ; GFX940-NEXT: s_mov_b32 s8, s4
13406 ; GFX940-NEXT: s_mov_b32 s9, s5
13407 ; GFX940-NEXT: s_mov_b32 s12, s2
13408 ; GFX940-NEXT: s_mov_b32 s13, s3
13409 ; GFX940-NEXT: s_mov_b32 s14, s2
13410 ; GFX940-NEXT: s_mov_b32 s15, s3
13411 ; GFX940-NEXT: ;;#ASMSTART
13412 ; GFX940-NEXT: ; use s[8:15]
13413 ; GFX940-NEXT: ;;#ASMEND
13414 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13415 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13416 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13417 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
13418 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13422 define void @s_shuffle_v4i64_v3i64__5_0_4_4() {
13423 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
13425 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13426 ; GFX900-NEXT: ;;#ASMSTART
13427 ; GFX900-NEXT: ; def s[4:9]
13428 ; GFX900-NEXT: ;;#ASMEND
13429 ; GFX900-NEXT: ;;#ASMSTART
13430 ; GFX900-NEXT: ; def s[12:17]
13431 ; GFX900-NEXT: ;;#ASMEND
13432 ; GFX900-NEXT: s_mov_b32 s8, s16
13433 ; GFX900-NEXT: s_mov_b32 s9, s17
13434 ; GFX900-NEXT: s_mov_b32 s10, s4
13435 ; GFX900-NEXT: s_mov_b32 s11, s5
13436 ; GFX900-NEXT: s_mov_b32 s12, s14
13437 ; GFX900-NEXT: s_mov_b32 s13, s15
13438 ; GFX900-NEXT: ;;#ASMSTART
13439 ; GFX900-NEXT: ; use s[8:15]
13440 ; GFX900-NEXT: ;;#ASMEND
13441 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13443 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
13445 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13446 ; GFX90A-NEXT: ;;#ASMSTART
13447 ; GFX90A-NEXT: ; def s[4:9]
13448 ; GFX90A-NEXT: ;;#ASMEND
13449 ; GFX90A-NEXT: ;;#ASMSTART
13450 ; GFX90A-NEXT: ; def s[12:17]
13451 ; GFX90A-NEXT: ;;#ASMEND
13452 ; GFX90A-NEXT: s_mov_b32 s8, s16
13453 ; GFX90A-NEXT: s_mov_b32 s9, s17
13454 ; GFX90A-NEXT: s_mov_b32 s10, s4
13455 ; GFX90A-NEXT: s_mov_b32 s11, s5
13456 ; GFX90A-NEXT: s_mov_b32 s12, s14
13457 ; GFX90A-NEXT: s_mov_b32 s13, s15
13458 ; GFX90A-NEXT: ;;#ASMSTART
13459 ; GFX90A-NEXT: ; use s[8:15]
13460 ; GFX90A-NEXT: ;;#ASMEND
13461 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13463 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
13465 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13466 ; GFX940-NEXT: ;;#ASMSTART
13467 ; GFX940-NEXT: ; def s[0:5]
13468 ; GFX940-NEXT: ;;#ASMEND
13469 ; GFX940-NEXT: s_mov_b32 s10, s0
13470 ; GFX940-NEXT: ;;#ASMSTART
13471 ; GFX940-NEXT: ; def s[4:9]
13472 ; GFX940-NEXT: ;;#ASMEND
13473 ; GFX940-NEXT: s_mov_b32 s11, s1
13474 ; GFX940-NEXT: s_mov_b32 s12, s6
13475 ; GFX940-NEXT: s_mov_b32 s13, s7
13476 ; GFX940-NEXT: s_mov_b32 s14, s6
13477 ; GFX940-NEXT: s_mov_b32 s15, s7
13478 ; GFX940-NEXT: ;;#ASMSTART
13479 ; GFX940-NEXT: ; use s[8:15]
13480 ; GFX940-NEXT: ;;#ASMEND
13481 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13482 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13483 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13484 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
13485 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13489 define void @s_shuffle_v4i64_v3i64__5_1_4_4() {
13490 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
13492 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13493 ; GFX900-NEXT: ;;#ASMSTART
13494 ; GFX900-NEXT: ; def s[8:13]
13495 ; GFX900-NEXT: ;;#ASMEND
13496 ; GFX900-NEXT: ;;#ASMSTART
13497 ; GFX900-NEXT: ; def s[4:9]
13498 ; GFX900-NEXT: ;;#ASMEND
13499 ; GFX900-NEXT: s_mov_b32 s12, s6
13500 ; GFX900-NEXT: s_mov_b32 s13, s7
13501 ; GFX900-NEXT: s_mov_b32 s14, s6
13502 ; GFX900-NEXT: s_mov_b32 s15, s7
13503 ; GFX900-NEXT: ;;#ASMSTART
13504 ; GFX900-NEXT: ; use s[8:15]
13505 ; GFX900-NEXT: ;;#ASMEND
13506 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13508 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
13510 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13511 ; GFX90A-NEXT: ;;#ASMSTART
13512 ; GFX90A-NEXT: ; def s[8:13]
13513 ; GFX90A-NEXT: ;;#ASMEND
13514 ; GFX90A-NEXT: ;;#ASMSTART
13515 ; GFX90A-NEXT: ; def s[4:9]
13516 ; GFX90A-NEXT: ;;#ASMEND
13517 ; GFX90A-NEXT: s_mov_b32 s12, s6
13518 ; GFX90A-NEXT: s_mov_b32 s13, s7
13519 ; GFX90A-NEXT: s_mov_b32 s14, s6
13520 ; GFX90A-NEXT: s_mov_b32 s15, s7
13521 ; GFX90A-NEXT: ;;#ASMSTART
13522 ; GFX90A-NEXT: ; use s[8:15]
13523 ; GFX90A-NEXT: ;;#ASMEND
13524 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13526 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
13528 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13529 ; GFX940-NEXT: ;;#ASMSTART
13530 ; GFX940-NEXT: ; def s[8:13]
13531 ; GFX940-NEXT: ;;#ASMEND
13532 ; GFX940-NEXT: ;;#ASMSTART
13533 ; GFX940-NEXT: ; def s[0:5]
13534 ; GFX940-NEXT: ;;#ASMEND
13535 ; GFX940-NEXT: s_mov_b32 s8, s4
13536 ; GFX940-NEXT: s_mov_b32 s9, s5
13537 ; GFX940-NEXT: s_mov_b32 s12, s2
13538 ; GFX940-NEXT: s_mov_b32 s13, s3
13539 ; GFX940-NEXT: s_mov_b32 s14, s2
13540 ; GFX940-NEXT: s_mov_b32 s15, s3
13541 ; GFX940-NEXT: ;;#ASMSTART
13542 ; GFX940-NEXT: ; use s[8:15]
13543 ; GFX940-NEXT: ;;#ASMEND
13544 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13545 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13546 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13547 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
13548 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13552 define void @s_shuffle_v4i64_v3i64__5_2_4_4() {
13553 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
13555 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13556 ; GFX900-NEXT: ;;#ASMSTART
13557 ; GFX900-NEXT: ; def s[8:13]
13558 ; GFX900-NEXT: ;;#ASMEND
13559 ; GFX900-NEXT: ;;#ASMSTART
13560 ; GFX900-NEXT: ; def s[4:9]
13561 ; GFX900-NEXT: ;;#ASMEND
13562 ; GFX900-NEXT: s_mov_b32 s10, s12
13563 ; GFX900-NEXT: s_mov_b32 s11, s13
13564 ; GFX900-NEXT: s_mov_b32 s12, s6
13565 ; GFX900-NEXT: s_mov_b32 s13, s7
13566 ; GFX900-NEXT: s_mov_b32 s14, s6
13567 ; GFX900-NEXT: s_mov_b32 s15, s7
13568 ; GFX900-NEXT: ;;#ASMSTART
13569 ; GFX900-NEXT: ; use s[8:15]
13570 ; GFX900-NEXT: ;;#ASMEND
13571 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13573 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
13575 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13576 ; GFX90A-NEXT: ;;#ASMSTART
13577 ; GFX90A-NEXT: ; def s[8:13]
13578 ; GFX90A-NEXT: ;;#ASMEND
13579 ; GFX90A-NEXT: ;;#ASMSTART
13580 ; GFX90A-NEXT: ; def s[4:9]
13581 ; GFX90A-NEXT: ;;#ASMEND
13582 ; GFX90A-NEXT: s_mov_b32 s10, s12
13583 ; GFX90A-NEXT: s_mov_b32 s11, s13
13584 ; GFX90A-NEXT: s_mov_b32 s12, s6
13585 ; GFX90A-NEXT: s_mov_b32 s13, s7
13586 ; GFX90A-NEXT: s_mov_b32 s14, s6
13587 ; GFX90A-NEXT: s_mov_b32 s15, s7
13588 ; GFX90A-NEXT: ;;#ASMSTART
13589 ; GFX90A-NEXT: ; use s[8:15]
13590 ; GFX90A-NEXT: ;;#ASMEND
13591 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13593 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
13595 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13596 ; GFX940-NEXT: ;;#ASMSTART
13597 ; GFX940-NEXT: ; def s[12:17]
13598 ; GFX940-NEXT: ;;#ASMEND
13599 ; GFX940-NEXT: ;;#ASMSTART
13600 ; GFX940-NEXT: ; def s[0:5]
13601 ; GFX940-NEXT: ;;#ASMEND
13602 ; GFX940-NEXT: s_mov_b32 s8, s16
13603 ; GFX940-NEXT: s_mov_b32 s9, s17
13604 ; GFX940-NEXT: s_mov_b32 s10, s4
13605 ; GFX940-NEXT: s_mov_b32 s11, s5
13606 ; GFX940-NEXT: s_mov_b32 s12, s14
13607 ; GFX940-NEXT: s_mov_b32 s13, s15
13608 ; GFX940-NEXT: ;;#ASMSTART
13609 ; GFX940-NEXT: ; use s[8:15]
13610 ; GFX940-NEXT: ;;#ASMEND
13611 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13612 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13613 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13614 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
13615 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13619 define void @s_shuffle_v4i64_v3i64__5_3_4_4() {
13620 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
13622 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13623 ; GFX900-NEXT: ;;#ASMSTART
13624 ; GFX900-NEXT: ; def s[4:9]
13625 ; GFX900-NEXT: ;;#ASMEND
13626 ; GFX900-NEXT: s_mov_b32 s10, s4
13627 ; GFX900-NEXT: s_mov_b32 s11, s5
13628 ; GFX900-NEXT: s_mov_b32 s12, s6
13629 ; GFX900-NEXT: s_mov_b32 s13, s7
13630 ; GFX900-NEXT: s_mov_b32 s14, s6
13631 ; GFX900-NEXT: s_mov_b32 s15, s7
13632 ; GFX900-NEXT: ;;#ASMSTART
13633 ; GFX900-NEXT: ; use s[8:15]
13634 ; GFX900-NEXT: ;;#ASMEND
13635 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13637 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
13639 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13640 ; GFX90A-NEXT: ;;#ASMSTART
13641 ; GFX90A-NEXT: ; def s[4:9]
13642 ; GFX90A-NEXT: ;;#ASMEND
13643 ; GFX90A-NEXT: s_mov_b32 s10, s4
13644 ; GFX90A-NEXT: s_mov_b32 s11, s5
13645 ; GFX90A-NEXT: s_mov_b32 s12, s6
13646 ; GFX90A-NEXT: s_mov_b32 s13, s7
13647 ; GFX90A-NEXT: s_mov_b32 s14, s6
13648 ; GFX90A-NEXT: s_mov_b32 s15, s7
13649 ; GFX90A-NEXT: ;;#ASMSTART
13650 ; GFX90A-NEXT: ; use s[8:15]
13651 ; GFX90A-NEXT: ;;#ASMEND
13652 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13654 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
13656 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13657 ; GFX940-NEXT: ;;#ASMSTART
13658 ; GFX940-NEXT: ; def s[0:5]
13659 ; GFX940-NEXT: ;;#ASMEND
13660 ; GFX940-NEXT: s_mov_b32 s8, s4
13661 ; GFX940-NEXT: s_mov_b32 s9, s5
13662 ; GFX940-NEXT: s_mov_b32 s10, s0
13663 ; GFX940-NEXT: s_mov_b32 s11, s1
13664 ; GFX940-NEXT: s_mov_b32 s12, s2
13665 ; GFX940-NEXT: s_mov_b32 s13, s3
13666 ; GFX940-NEXT: s_mov_b32 s14, s2
13667 ; GFX940-NEXT: s_mov_b32 s15, s3
13668 ; GFX940-NEXT: ;;#ASMSTART
13669 ; GFX940-NEXT: ; use s[8:15]
13670 ; GFX940-NEXT: ;;#ASMEND
13671 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13672 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13673 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13674 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
13675 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13679 define void @s_shuffle_v4i64_v3i64__5_5_4_4() {
13680 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
13682 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13683 ; GFX900-NEXT: ;;#ASMSTART
13684 ; GFX900-NEXT: ; def s[12:17]
13685 ; GFX900-NEXT: ;;#ASMEND
13686 ; GFX900-NEXT: s_mov_b32 s8, s16
13687 ; GFX900-NEXT: s_mov_b32 s9, s17
13688 ; GFX900-NEXT: s_mov_b32 s10, s16
13689 ; GFX900-NEXT: s_mov_b32 s11, s17
13690 ; GFX900-NEXT: s_mov_b32 s12, s14
13691 ; GFX900-NEXT: s_mov_b32 s13, s15
13692 ; GFX900-NEXT: ;;#ASMSTART
13693 ; GFX900-NEXT: ; use s[8:15]
13694 ; GFX900-NEXT: ;;#ASMEND
13695 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13697 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
13699 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13700 ; GFX90A-NEXT: ;;#ASMSTART
13701 ; GFX90A-NEXT: ; def s[12:17]
13702 ; GFX90A-NEXT: ;;#ASMEND
13703 ; GFX90A-NEXT: s_mov_b32 s8, s16
13704 ; GFX90A-NEXT: s_mov_b32 s9, s17
13705 ; GFX90A-NEXT: s_mov_b32 s10, s16
13706 ; GFX90A-NEXT: s_mov_b32 s11, s17
13707 ; GFX90A-NEXT: s_mov_b32 s12, s14
13708 ; GFX90A-NEXT: s_mov_b32 s13, s15
13709 ; GFX90A-NEXT: ;;#ASMSTART
13710 ; GFX90A-NEXT: ; use s[8:15]
13711 ; GFX90A-NEXT: ;;#ASMEND
13712 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13714 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
13716 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13717 ; GFX940-NEXT: ;;#ASMSTART
13718 ; GFX940-NEXT: ; def s[0:5]
13719 ; GFX940-NEXT: ;;#ASMEND
13720 ; GFX940-NEXT: s_mov_b32 s8, s4
13721 ; GFX940-NEXT: s_mov_b32 s9, s5
13722 ; GFX940-NEXT: s_mov_b32 s10, s4
13723 ; GFX940-NEXT: s_mov_b32 s11, s5
13724 ; GFX940-NEXT: s_mov_b32 s12, s2
13725 ; GFX940-NEXT: s_mov_b32 s13, s3
13726 ; GFX940-NEXT: s_mov_b32 s14, s2
13727 ; GFX940-NEXT: s_mov_b32 s15, s3
13728 ; GFX940-NEXT: ;;#ASMSTART
13729 ; GFX940-NEXT: ; use s[8:15]
13730 ; GFX940-NEXT: ;;#ASMEND
13731 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13732 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13733 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13734 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
13735 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13739 define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
13740 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
13742 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13743 ; GFX900-NEXT: ;;#ASMSTART
13744 ; GFX900-NEXT: ; def s[12:17]
13745 ; GFX900-NEXT: ;;#ASMEND
13746 ; GFX900-NEXT: s_mov_b32 s8, s16
13747 ; GFX900-NEXT: s_mov_b32 s9, s17
13748 ; GFX900-NEXT: s_mov_b32 s10, s16
13749 ; GFX900-NEXT: s_mov_b32 s11, s17
13750 ; GFX900-NEXT: ;;#ASMSTART
13751 ; GFX900-NEXT: ; use s[8:15]
13752 ; GFX900-NEXT: ;;#ASMEND
13753 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13755 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
13757 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13758 ; GFX90A-NEXT: ;;#ASMSTART
13759 ; GFX90A-NEXT: ; def s[12:17]
13760 ; GFX90A-NEXT: ;;#ASMEND
13761 ; GFX90A-NEXT: s_mov_b32 s8, s16
13762 ; GFX90A-NEXT: s_mov_b32 s9, s17
13763 ; GFX90A-NEXT: s_mov_b32 s10, s16
13764 ; GFX90A-NEXT: s_mov_b32 s11, s17
13765 ; GFX90A-NEXT: ;;#ASMSTART
13766 ; GFX90A-NEXT: ; use s[8:15]
13767 ; GFX90A-NEXT: ;;#ASMEND
13768 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13770 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
13772 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13773 ; GFX940-NEXT: ;;#ASMSTART
13774 ; GFX940-NEXT: ; def s[0:5]
13775 ; GFX940-NEXT: ;;#ASMEND
13776 ; GFX940-NEXT: s_mov_b32 s8, s4
13777 ; GFX940-NEXT: s_mov_b32 s9, s5
13778 ; GFX940-NEXT: s_mov_b32 s10, s4
13779 ; GFX940-NEXT: s_mov_b32 s11, s5
13780 ; GFX940-NEXT: s_mov_b32 s14, s2
13781 ; GFX940-NEXT: s_mov_b32 s15, s3
13782 ; GFX940-NEXT: ;;#ASMSTART
13783 ; GFX940-NEXT: ; use s[8:15]
13784 ; GFX940-NEXT: ;;#ASMEND
13785 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13786 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13787 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13788 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
13789 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13793 define void @s_shuffle_v4i64_v3i64__5_5_0_4() {
13794 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
13796 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13797 ; GFX900-NEXT: ;;#ASMSTART
13798 ; GFX900-NEXT: ; def s[4:9]
13799 ; GFX900-NEXT: ;;#ASMEND
13800 ; GFX900-NEXT: ;;#ASMSTART
13801 ; GFX900-NEXT: ; def s[12:17]
13802 ; GFX900-NEXT: ;;#ASMEND
13803 ; GFX900-NEXT: s_mov_b32 s8, s16
13804 ; GFX900-NEXT: s_mov_b32 s9, s17
13805 ; GFX900-NEXT: s_mov_b32 s10, s16
13806 ; GFX900-NEXT: s_mov_b32 s11, s17
13807 ; GFX900-NEXT: s_mov_b32 s12, s4
13808 ; GFX900-NEXT: s_mov_b32 s13, s5
13809 ; GFX900-NEXT: ;;#ASMSTART
13810 ; GFX900-NEXT: ; use s[8:15]
13811 ; GFX900-NEXT: ;;#ASMEND
13812 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13814 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
13816 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13817 ; GFX90A-NEXT: ;;#ASMSTART
13818 ; GFX90A-NEXT: ; def s[4:9]
13819 ; GFX90A-NEXT: ;;#ASMEND
13820 ; GFX90A-NEXT: ;;#ASMSTART
13821 ; GFX90A-NEXT: ; def s[12:17]
13822 ; GFX90A-NEXT: ;;#ASMEND
13823 ; GFX90A-NEXT: s_mov_b32 s8, s16
13824 ; GFX90A-NEXT: s_mov_b32 s9, s17
13825 ; GFX90A-NEXT: s_mov_b32 s10, s16
13826 ; GFX90A-NEXT: s_mov_b32 s11, s17
13827 ; GFX90A-NEXT: s_mov_b32 s12, s4
13828 ; GFX90A-NEXT: s_mov_b32 s13, s5
13829 ; GFX90A-NEXT: ;;#ASMSTART
13830 ; GFX90A-NEXT: ; use s[8:15]
13831 ; GFX90A-NEXT: ;;#ASMEND
13832 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13834 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
13836 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13837 ; GFX940-NEXT: ;;#ASMSTART
13838 ; GFX940-NEXT: ; def s[12:17]
13839 ; GFX940-NEXT: ;;#ASMEND
13840 ; GFX940-NEXT: ;;#ASMSTART
13841 ; GFX940-NEXT: ; def s[0:5]
13842 ; GFX940-NEXT: ;;#ASMEND
13843 ; GFX940-NEXT: s_mov_b32 s8, s16
13844 ; GFX940-NEXT: s_mov_b32 s9, s17
13845 ; GFX940-NEXT: s_mov_b32 s10, s16
13846 ; GFX940-NEXT: s_mov_b32 s11, s17
13847 ; GFX940-NEXT: s_mov_b32 s12, s0
13848 ; GFX940-NEXT: s_mov_b32 s13, s1
13849 ; GFX940-NEXT: ;;#ASMSTART
13850 ; GFX940-NEXT: ; use s[8:15]
13851 ; GFX940-NEXT: ;;#ASMEND
13852 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13853 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13854 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13855 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
13856 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13860 define void @s_shuffle_v4i64_v3i64__5_5_1_4() {
13861 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
13863 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13864 ; GFX900-NEXT: ;;#ASMSTART
13865 ; GFX900-NEXT: ; def s[4:9]
13866 ; GFX900-NEXT: ;;#ASMEND
13867 ; GFX900-NEXT: ;;#ASMSTART
13868 ; GFX900-NEXT: ; def s[12:17]
13869 ; GFX900-NEXT: ;;#ASMEND
13870 ; GFX900-NEXT: s_mov_b32 s8, s16
13871 ; GFX900-NEXT: s_mov_b32 s9, s17
13872 ; GFX900-NEXT: s_mov_b32 s10, s16
13873 ; GFX900-NEXT: s_mov_b32 s11, s17
13874 ; GFX900-NEXT: s_mov_b32 s12, s6
13875 ; GFX900-NEXT: s_mov_b32 s13, s7
13876 ; GFX900-NEXT: ;;#ASMSTART
13877 ; GFX900-NEXT: ; use s[8:15]
13878 ; GFX900-NEXT: ;;#ASMEND
13879 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13881 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
13883 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13884 ; GFX90A-NEXT: ;;#ASMSTART
13885 ; GFX90A-NEXT: ; def s[4:9]
13886 ; GFX90A-NEXT: ;;#ASMEND
13887 ; GFX90A-NEXT: ;;#ASMSTART
13888 ; GFX90A-NEXT: ; def s[12:17]
13889 ; GFX90A-NEXT: ;;#ASMEND
13890 ; GFX90A-NEXT: s_mov_b32 s8, s16
13891 ; GFX90A-NEXT: s_mov_b32 s9, s17
13892 ; GFX90A-NEXT: s_mov_b32 s10, s16
13893 ; GFX90A-NEXT: s_mov_b32 s11, s17
13894 ; GFX90A-NEXT: s_mov_b32 s12, s6
13895 ; GFX90A-NEXT: s_mov_b32 s13, s7
13896 ; GFX90A-NEXT: ;;#ASMSTART
13897 ; GFX90A-NEXT: ; use s[8:15]
13898 ; GFX90A-NEXT: ;;#ASMEND
13899 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13901 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
13903 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13904 ; GFX940-NEXT: ;;#ASMSTART
13905 ; GFX940-NEXT: ; def s[12:17]
13906 ; GFX940-NEXT: ;;#ASMEND
13907 ; GFX940-NEXT: ;;#ASMSTART
13908 ; GFX940-NEXT: ; def s[0:5]
13909 ; GFX940-NEXT: ;;#ASMEND
13910 ; GFX940-NEXT: s_mov_b32 s8, s16
13911 ; GFX940-NEXT: s_mov_b32 s9, s17
13912 ; GFX940-NEXT: s_mov_b32 s10, s16
13913 ; GFX940-NEXT: s_mov_b32 s11, s17
13914 ; GFX940-NEXT: s_mov_b32 s12, s2
13915 ; GFX940-NEXT: s_mov_b32 s13, s3
13916 ; GFX940-NEXT: ;;#ASMSTART
13917 ; GFX940-NEXT: ; use s[8:15]
13918 ; GFX940-NEXT: ;;#ASMEND
13919 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13920 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13921 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13922 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
13923 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13927 define void @s_shuffle_v4i64_v3i64__5_5_2_4() {
13928 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
13930 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13931 ; GFX900-NEXT: ;;#ASMSTART
13932 ; GFX900-NEXT: ; def s[8:13]
13933 ; GFX900-NEXT: ;;#ASMEND
13934 ; GFX900-NEXT: ;;#ASMSTART
13935 ; GFX900-NEXT: ; def s[16:21]
13936 ; GFX900-NEXT: ;;#ASMEND
13937 ; GFX900-NEXT: s_mov_b32 s8, s20
13938 ; GFX900-NEXT: s_mov_b32 s9, s21
13939 ; GFX900-NEXT: s_mov_b32 s10, s20
13940 ; GFX900-NEXT: s_mov_b32 s11, s21
13941 ; GFX900-NEXT: s_mov_b32 s14, s18
13942 ; GFX900-NEXT: s_mov_b32 s15, s19
13943 ; GFX900-NEXT: ;;#ASMSTART
13944 ; GFX900-NEXT: ; use s[8:15]
13945 ; GFX900-NEXT: ;;#ASMEND
13946 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13948 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
13950 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13951 ; GFX90A-NEXT: ;;#ASMSTART
13952 ; GFX90A-NEXT: ; def s[8:13]
13953 ; GFX90A-NEXT: ;;#ASMEND
13954 ; GFX90A-NEXT: ;;#ASMSTART
13955 ; GFX90A-NEXT: ; def s[16:21]
13956 ; GFX90A-NEXT: ;;#ASMEND
13957 ; GFX90A-NEXT: s_mov_b32 s8, s20
13958 ; GFX90A-NEXT: s_mov_b32 s9, s21
13959 ; GFX90A-NEXT: s_mov_b32 s10, s20
13960 ; GFX90A-NEXT: s_mov_b32 s11, s21
13961 ; GFX90A-NEXT: s_mov_b32 s14, s18
13962 ; GFX90A-NEXT: s_mov_b32 s15, s19
13963 ; GFX90A-NEXT: ;;#ASMSTART
13964 ; GFX90A-NEXT: ; use s[8:15]
13965 ; GFX90A-NEXT: ;;#ASMEND
13966 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13968 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
13970 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13971 ; GFX940-NEXT: ;;#ASMSTART
13972 ; GFX940-NEXT: ; def s[8:13]
13973 ; GFX940-NEXT: ;;#ASMEND
13974 ; GFX940-NEXT: ;;#ASMSTART
13975 ; GFX940-NEXT: ; def s[0:5]
13976 ; GFX940-NEXT: ;;#ASMEND
13977 ; GFX940-NEXT: s_mov_b32 s8, s4
13978 ; GFX940-NEXT: s_mov_b32 s9, s5
13979 ; GFX940-NEXT: s_mov_b32 s10, s4
13980 ; GFX940-NEXT: s_mov_b32 s11, s5
13981 ; GFX940-NEXT: s_mov_b32 s14, s2
13982 ; GFX940-NEXT: s_mov_b32 s15, s3
13983 ; GFX940-NEXT: ;;#ASMSTART
13984 ; GFX940-NEXT: ; use s[8:15]
13985 ; GFX940-NEXT: ;;#ASMEND
13986 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13987 %vec0 = call <3 x i64> asm "; def $0", "=s"()
13988 %vec1 = call <3 x i64> asm "; def $0", "=s"()
13989 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
13990 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13994 define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
13995 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
13997 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13998 ; GFX900-NEXT: ;;#ASMSTART
13999 ; GFX900-NEXT: ; def s[12:17]
14000 ; GFX900-NEXT: ;;#ASMEND
14001 ; GFX900-NEXT: s_mov_b32 s8, s16
14002 ; GFX900-NEXT: s_mov_b32 s9, s17
14003 ; GFX900-NEXT: s_mov_b32 s10, s16
14004 ; GFX900-NEXT: s_mov_b32 s11, s17
14005 ; GFX900-NEXT: ;;#ASMSTART
14006 ; GFX900-NEXT: ; use s[8:15]
14007 ; GFX900-NEXT: ;;#ASMEND
14008 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14010 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
14012 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14013 ; GFX90A-NEXT: ;;#ASMSTART
14014 ; GFX90A-NEXT: ; def s[12:17]
14015 ; GFX90A-NEXT: ;;#ASMEND
14016 ; GFX90A-NEXT: s_mov_b32 s8, s16
14017 ; GFX90A-NEXT: s_mov_b32 s9, s17
14018 ; GFX90A-NEXT: s_mov_b32 s10, s16
14019 ; GFX90A-NEXT: s_mov_b32 s11, s17
14020 ; GFX90A-NEXT: ;;#ASMSTART
14021 ; GFX90A-NEXT: ; use s[8:15]
14022 ; GFX90A-NEXT: ;;#ASMEND
14023 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14025 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
14027 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14028 ; GFX940-NEXT: ;;#ASMSTART
14029 ; GFX940-NEXT: ; def s[0:5]
14030 ; GFX940-NEXT: ;;#ASMEND
14031 ; GFX940-NEXT: s_mov_b32 s8, s4
14032 ; GFX940-NEXT: s_mov_b32 s9, s5
14033 ; GFX940-NEXT: s_mov_b32 s10, s4
14034 ; GFX940-NEXT: s_mov_b32 s11, s5
14035 ; GFX940-NEXT: s_mov_b32 s12, s0
14036 ; GFX940-NEXT: s_mov_b32 s13, s1
14037 ; GFX940-NEXT: s_mov_b32 s14, s2
14038 ; GFX940-NEXT: s_mov_b32 s15, s3
14039 ; GFX940-NEXT: ;;#ASMSTART
14040 ; GFX940-NEXT: ; use s[8:15]
14041 ; GFX940-NEXT: ;;#ASMEND
14042 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14043 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14044 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14045 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
14046 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14050 define void @s_shuffle_v4i64_v3i64__u_5_5_5() {
14051 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
14053 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14054 ; GFX9-NEXT: ;;#ASMSTART
14055 ; GFX9-NEXT: ; def s[8:13]
14056 ; GFX9-NEXT: ;;#ASMEND
14057 ; GFX9-NEXT: s_mov_b32 s10, s12
14058 ; GFX9-NEXT: s_mov_b32 s11, s13
14059 ; GFX9-NEXT: s_mov_b32 s14, s12
14060 ; GFX9-NEXT: s_mov_b32 s15, s13
14061 ; GFX9-NEXT: ;;#ASMSTART
14062 ; GFX9-NEXT: ; use s[8:15]
14063 ; GFX9-NEXT: ;;#ASMEND
14064 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14065 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14066 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14067 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
14068 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14072 define void @s_shuffle_v4i64_v3i64__0_5_5_5() {
14073 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
14075 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14076 ; GFX900-NEXT: ;;#ASMSTART
14077 ; GFX900-NEXT: ; def s[8:13]
14078 ; GFX900-NEXT: ;;#ASMEND
14079 ; GFX900-NEXT: ;;#ASMSTART
14080 ; GFX900-NEXT: ; def s[12:17]
14081 ; GFX900-NEXT: ;;#ASMEND
14082 ; GFX900-NEXT: s_mov_b32 s10, s16
14083 ; GFX900-NEXT: s_mov_b32 s11, s17
14084 ; GFX900-NEXT: s_mov_b32 s12, s16
14085 ; GFX900-NEXT: s_mov_b32 s13, s17
14086 ; GFX900-NEXT: s_mov_b32 s14, s16
14087 ; GFX900-NEXT: s_mov_b32 s15, s17
14088 ; GFX900-NEXT: ;;#ASMSTART
14089 ; GFX900-NEXT: ; use s[8:15]
14090 ; GFX900-NEXT: ;;#ASMEND
14091 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14093 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
14095 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14096 ; GFX90A-NEXT: ;;#ASMSTART
14097 ; GFX90A-NEXT: ; def s[8:13]
14098 ; GFX90A-NEXT: ;;#ASMEND
14099 ; GFX90A-NEXT: ;;#ASMSTART
14100 ; GFX90A-NEXT: ; def s[12:17]
14101 ; GFX90A-NEXT: ;;#ASMEND
14102 ; GFX90A-NEXT: s_mov_b32 s10, s16
14103 ; GFX90A-NEXT: s_mov_b32 s11, s17
14104 ; GFX90A-NEXT: s_mov_b32 s12, s16
14105 ; GFX90A-NEXT: s_mov_b32 s13, s17
14106 ; GFX90A-NEXT: s_mov_b32 s14, s16
14107 ; GFX90A-NEXT: s_mov_b32 s15, s17
14108 ; GFX90A-NEXT: ;;#ASMSTART
14109 ; GFX90A-NEXT: ; use s[8:15]
14110 ; GFX90A-NEXT: ;;#ASMEND
14111 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14113 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
14115 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14116 ; GFX940-NEXT: ;;#ASMSTART
14117 ; GFX940-NEXT: ; def s[8:13]
14118 ; GFX940-NEXT: ;;#ASMEND
14119 ; GFX940-NEXT: ;;#ASMSTART
14120 ; GFX940-NEXT: ; def s[0:5]
14121 ; GFX940-NEXT: ;;#ASMEND
14122 ; GFX940-NEXT: s_mov_b32 s10, s4
14123 ; GFX940-NEXT: s_mov_b32 s11, s5
14124 ; GFX940-NEXT: s_mov_b32 s12, s4
14125 ; GFX940-NEXT: s_mov_b32 s13, s5
14126 ; GFX940-NEXT: s_mov_b32 s14, s4
14127 ; GFX940-NEXT: s_mov_b32 s15, s5
14128 ; GFX940-NEXT: ;;#ASMSTART
14129 ; GFX940-NEXT: ; use s[8:15]
14130 ; GFX940-NEXT: ;;#ASMEND
14131 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14132 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14133 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14134 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
14135 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14139 define void @s_shuffle_v4i64_v3i64__1_5_5_5() {
14140 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
14142 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14143 ; GFX900-NEXT: ;;#ASMSTART
14144 ; GFX900-NEXT: ; def s[4:9]
14145 ; GFX900-NEXT: ;;#ASMEND
14146 ; GFX900-NEXT: ;;#ASMSTART
14147 ; GFX900-NEXT: ; def s[8:13]
14148 ; GFX900-NEXT: ;;#ASMEND
14149 ; GFX900-NEXT: s_mov_b32 s8, s6
14150 ; GFX900-NEXT: s_mov_b32 s9, s7
14151 ; GFX900-NEXT: s_mov_b32 s10, s12
14152 ; GFX900-NEXT: s_mov_b32 s11, s13
14153 ; GFX900-NEXT: s_mov_b32 s14, s12
14154 ; GFX900-NEXT: s_mov_b32 s15, s13
14155 ; GFX900-NEXT: ;;#ASMSTART
14156 ; GFX900-NEXT: ; use s[8:15]
14157 ; GFX900-NEXT: ;;#ASMEND
14158 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14160 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
14162 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14163 ; GFX90A-NEXT: ;;#ASMSTART
14164 ; GFX90A-NEXT: ; def s[4:9]
14165 ; GFX90A-NEXT: ;;#ASMEND
14166 ; GFX90A-NEXT: ;;#ASMSTART
14167 ; GFX90A-NEXT: ; def s[8:13]
14168 ; GFX90A-NEXT: ;;#ASMEND
14169 ; GFX90A-NEXT: s_mov_b32 s8, s6
14170 ; GFX90A-NEXT: s_mov_b32 s9, s7
14171 ; GFX90A-NEXT: s_mov_b32 s10, s12
14172 ; GFX90A-NEXT: s_mov_b32 s11, s13
14173 ; GFX90A-NEXT: s_mov_b32 s14, s12
14174 ; GFX90A-NEXT: s_mov_b32 s15, s13
14175 ; GFX90A-NEXT: ;;#ASMSTART
14176 ; GFX90A-NEXT: ; use s[8:15]
14177 ; GFX90A-NEXT: ;;#ASMEND
14178 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14180 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
14182 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14183 ; GFX940-NEXT: ;;#ASMSTART
14184 ; GFX940-NEXT: ; def s[8:13]
14185 ; GFX940-NEXT: ;;#ASMEND
14186 ; GFX940-NEXT: ;;#ASMSTART
14187 ; GFX940-NEXT: ; def s[0:5]
14188 ; GFX940-NEXT: ;;#ASMEND
14189 ; GFX940-NEXT: s_mov_b32 s8, s2
14190 ; GFX940-NEXT: s_mov_b32 s9, s3
14191 ; GFX940-NEXT: s_mov_b32 s10, s12
14192 ; GFX940-NEXT: s_mov_b32 s11, s13
14193 ; GFX940-NEXT: s_mov_b32 s14, s12
14194 ; GFX940-NEXT: s_mov_b32 s15, s13
14195 ; GFX940-NEXT: ;;#ASMSTART
14196 ; GFX940-NEXT: ; use s[8:15]
14197 ; GFX940-NEXT: ;;#ASMEND
14198 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14199 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14200 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14201 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
14202 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14206 define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
14207 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
14209 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14210 ; GFX900-NEXT: ;;#ASMSTART
14211 ; GFX900-NEXT: ; def s[12:17]
14212 ; GFX900-NEXT: ;;#ASMEND
14213 ; GFX900-NEXT: ;;#ASMSTART
14214 ; GFX900-NEXT: ; def s[8:13]
14215 ; GFX900-NEXT: ;;#ASMEND
14216 ; GFX900-NEXT: s_mov_b32 s8, s16
14217 ; GFX900-NEXT: s_mov_b32 s9, s17
14218 ; GFX900-NEXT: s_mov_b32 s10, s12
14219 ; GFX900-NEXT: s_mov_b32 s11, s13
14220 ; GFX900-NEXT: s_mov_b32 s14, s12
14221 ; GFX900-NEXT: s_mov_b32 s15, s13
14222 ; GFX900-NEXT: ;;#ASMSTART
14223 ; GFX900-NEXT: ; use s[8:15]
14224 ; GFX900-NEXT: ;;#ASMEND
14225 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14227 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
14229 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14230 ; GFX90A-NEXT: ;;#ASMSTART
14231 ; GFX90A-NEXT: ; def s[12:17]
14232 ; GFX90A-NEXT: ;;#ASMEND
14233 ; GFX90A-NEXT: ;;#ASMSTART
14234 ; GFX90A-NEXT: ; def s[8:13]
14235 ; GFX90A-NEXT: ;;#ASMEND
14236 ; GFX90A-NEXT: s_mov_b32 s8, s16
14237 ; GFX90A-NEXT: s_mov_b32 s9, s17
14238 ; GFX90A-NEXT: s_mov_b32 s10, s12
14239 ; GFX90A-NEXT: s_mov_b32 s11, s13
14240 ; GFX90A-NEXT: s_mov_b32 s14, s12
14241 ; GFX90A-NEXT: s_mov_b32 s15, s13
14242 ; GFX90A-NEXT: ;;#ASMSTART
14243 ; GFX90A-NEXT: ; use s[8:15]
14244 ; GFX90A-NEXT: ;;#ASMEND
14245 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14247 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
14249 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14250 ; GFX940-NEXT: ;;#ASMSTART
14251 ; GFX940-NEXT: ; def s[8:13]
14252 ; GFX940-NEXT: ;;#ASMEND
14253 ; GFX940-NEXT: ;;#ASMSTART
14254 ; GFX940-NEXT: ; def s[0:5]
14255 ; GFX940-NEXT: ;;#ASMEND
14256 ; GFX940-NEXT: s_mov_b32 s8, s4
14257 ; GFX940-NEXT: s_mov_b32 s9, s5
14258 ; GFX940-NEXT: s_mov_b32 s10, s12
14259 ; GFX940-NEXT: s_mov_b32 s11, s13
14260 ; GFX940-NEXT: s_mov_b32 s14, s12
14261 ; GFX940-NEXT: s_mov_b32 s15, s13
14262 ; GFX940-NEXT: ;;#ASMSTART
14263 ; GFX940-NEXT: ; use s[8:15]
14264 ; GFX940-NEXT: ;;#ASMEND
14265 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14266 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14267 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14268 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
14269 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14273 define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
14274 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
14276 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14277 ; GFX9-NEXT: ;;#ASMSTART
14278 ; GFX9-NEXT: ; def s[8:13]
14279 ; GFX9-NEXT: ;;#ASMEND
14280 ; GFX9-NEXT: s_mov_b32 s10, s12
14281 ; GFX9-NEXT: s_mov_b32 s11, s13
14282 ; GFX9-NEXT: s_mov_b32 s14, s12
14283 ; GFX9-NEXT: s_mov_b32 s15, s13
14284 ; GFX9-NEXT: ;;#ASMSTART
14285 ; GFX9-NEXT: ; use s[8:15]
14286 ; GFX9-NEXT: ;;#ASMEND
14287 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14288 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14289 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14290 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
14291 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14295 define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
14296 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
14298 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14299 ; GFX9-NEXT: ;;#ASMSTART
14300 ; GFX9-NEXT: ; def s[8:13]
14301 ; GFX9-NEXT: ;;#ASMEND
14302 ; GFX9-NEXT: s_mov_b32 s8, s10
14303 ; GFX9-NEXT: s_mov_b32 s9, s11
14304 ; GFX9-NEXT: s_mov_b32 s10, s12
14305 ; GFX9-NEXT: s_mov_b32 s11, s13
14306 ; GFX9-NEXT: s_mov_b32 s14, s12
14307 ; GFX9-NEXT: s_mov_b32 s15, s13
14308 ; GFX9-NEXT: ;;#ASMSTART
14309 ; GFX9-NEXT: ; use s[8:15]
14310 ; GFX9-NEXT: ;;#ASMEND
14311 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14312 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14313 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14314 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
14315 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14319 define void @s_shuffle_v4i64_v3i64__5_u_5_5() {
14320 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
14322 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14323 ; GFX9-NEXT: ;;#ASMSTART
14324 ; GFX9-NEXT: ; def s[8:13]
14325 ; GFX9-NEXT: ;;#ASMEND
14326 ; GFX9-NEXT: s_mov_b32 s8, s12
14327 ; GFX9-NEXT: s_mov_b32 s9, s13
14328 ; GFX9-NEXT: s_mov_b32 s14, s12
14329 ; GFX9-NEXT: s_mov_b32 s15, s13
14330 ; GFX9-NEXT: ;;#ASMSTART
14331 ; GFX9-NEXT: ; use s[8:15]
14332 ; GFX9-NEXT: ;;#ASMEND
14333 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14334 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14335 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14336 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
14337 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14341 define void @s_shuffle_v4i64_v3i64__5_0_5_5() {
14342 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
14344 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14345 ; GFX900-NEXT: ;;#ASMSTART
14346 ; GFX900-NEXT: ; def s[4:9]
14347 ; GFX900-NEXT: ;;#ASMEND
14348 ; GFX900-NEXT: ;;#ASMSTART
14349 ; GFX900-NEXT: ; def s[8:13]
14350 ; GFX900-NEXT: ;;#ASMEND
14351 ; GFX900-NEXT: s_mov_b32 s8, s12
14352 ; GFX900-NEXT: s_mov_b32 s9, s13
14353 ; GFX900-NEXT: s_mov_b32 s10, s4
14354 ; GFX900-NEXT: s_mov_b32 s11, s5
14355 ; GFX900-NEXT: s_mov_b32 s14, s12
14356 ; GFX900-NEXT: s_mov_b32 s15, s13
14357 ; GFX900-NEXT: ;;#ASMSTART
14358 ; GFX900-NEXT: ; use s[8:15]
14359 ; GFX900-NEXT: ;;#ASMEND
14360 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14362 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
14364 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14365 ; GFX90A-NEXT: ;;#ASMSTART
14366 ; GFX90A-NEXT: ; def s[4:9]
14367 ; GFX90A-NEXT: ;;#ASMEND
14368 ; GFX90A-NEXT: ;;#ASMSTART
14369 ; GFX90A-NEXT: ; def s[8:13]
14370 ; GFX90A-NEXT: ;;#ASMEND
14371 ; GFX90A-NEXT: s_mov_b32 s8, s12
14372 ; GFX90A-NEXT: s_mov_b32 s9, s13
14373 ; GFX90A-NEXT: s_mov_b32 s10, s4
14374 ; GFX90A-NEXT: s_mov_b32 s11, s5
14375 ; GFX90A-NEXT: s_mov_b32 s14, s12
14376 ; GFX90A-NEXT: s_mov_b32 s15, s13
14377 ; GFX90A-NEXT: ;;#ASMSTART
14378 ; GFX90A-NEXT: ; use s[8:15]
14379 ; GFX90A-NEXT: ;;#ASMEND
14380 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14382 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
14384 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14385 ; GFX940-NEXT: ;;#ASMSTART
14386 ; GFX940-NEXT: ; def s[8:13]
14387 ; GFX940-NEXT: ;;#ASMEND
14388 ; GFX940-NEXT: ;;#ASMSTART
14389 ; GFX940-NEXT: ; def s[0:5]
14390 ; GFX940-NEXT: ;;#ASMEND
14391 ; GFX940-NEXT: s_mov_b32 s8, s12
14392 ; GFX940-NEXT: s_mov_b32 s9, s13
14393 ; GFX940-NEXT: s_mov_b32 s10, s0
14394 ; GFX940-NEXT: s_mov_b32 s11, s1
14395 ; GFX940-NEXT: s_mov_b32 s14, s12
14396 ; GFX940-NEXT: s_mov_b32 s15, s13
14397 ; GFX940-NEXT: ;;#ASMSTART
14398 ; GFX940-NEXT: ; use s[8:15]
14399 ; GFX940-NEXT: ;;#ASMEND
14400 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14401 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14402 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14403 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
14404 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14408 define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
14409 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
14411 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14412 ; GFX900-NEXT: ;;#ASMSTART
14413 ; GFX900-NEXT: ; def s[8:13]
14414 ; GFX900-NEXT: ;;#ASMEND
14415 ; GFX900-NEXT: ;;#ASMSTART
14416 ; GFX900-NEXT: ; def s[12:17]
14417 ; GFX900-NEXT: ;;#ASMEND
14418 ; GFX900-NEXT: s_mov_b32 s8, s16
14419 ; GFX900-NEXT: s_mov_b32 s9, s17
14420 ; GFX900-NEXT: s_mov_b32 s12, s16
14421 ; GFX900-NEXT: s_mov_b32 s13, s17
14422 ; GFX900-NEXT: s_mov_b32 s14, s16
14423 ; GFX900-NEXT: s_mov_b32 s15, s17
14424 ; GFX900-NEXT: ;;#ASMSTART
14425 ; GFX900-NEXT: ; use s[8:15]
14426 ; GFX900-NEXT: ;;#ASMEND
14427 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14429 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
14431 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14432 ; GFX90A-NEXT: ;;#ASMSTART
14433 ; GFX90A-NEXT: ; def s[8:13]
14434 ; GFX90A-NEXT: ;;#ASMEND
14435 ; GFX90A-NEXT: ;;#ASMSTART
14436 ; GFX90A-NEXT: ; def s[12:17]
14437 ; GFX90A-NEXT: ;;#ASMEND
14438 ; GFX90A-NEXT: s_mov_b32 s8, s16
14439 ; GFX90A-NEXT: s_mov_b32 s9, s17
14440 ; GFX90A-NEXT: s_mov_b32 s12, s16
14441 ; GFX90A-NEXT: s_mov_b32 s13, s17
14442 ; GFX90A-NEXT: s_mov_b32 s14, s16
14443 ; GFX90A-NEXT: s_mov_b32 s15, s17
14444 ; GFX90A-NEXT: ;;#ASMSTART
14445 ; GFX90A-NEXT: ; use s[8:15]
14446 ; GFX90A-NEXT: ;;#ASMEND
14447 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14449 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
14451 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14452 ; GFX940-NEXT: ;;#ASMSTART
14453 ; GFX940-NEXT: ; def s[8:13]
14454 ; GFX940-NEXT: ;;#ASMEND
14455 ; GFX940-NEXT: ;;#ASMSTART
14456 ; GFX940-NEXT: ; def s[0:5]
14457 ; GFX940-NEXT: ;;#ASMEND
14458 ; GFX940-NEXT: s_mov_b32 s8, s4
14459 ; GFX940-NEXT: s_mov_b32 s9, s5
14460 ; GFX940-NEXT: s_mov_b32 s12, s4
14461 ; GFX940-NEXT: s_mov_b32 s13, s5
14462 ; GFX940-NEXT: s_mov_b32 s14, s4
14463 ; GFX940-NEXT: s_mov_b32 s15, s5
14464 ; GFX940-NEXT: ;;#ASMSTART
14465 ; GFX940-NEXT: ; use s[8:15]
14466 ; GFX940-NEXT: ;;#ASMEND
14467 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14468 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14469 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14470 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
14471 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14475 define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
14476 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
14478 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14479 ; GFX900-NEXT: ;;#ASMSTART
14480 ; GFX900-NEXT: ; def s[12:17]
14481 ; GFX900-NEXT: ;;#ASMEND
14482 ; GFX900-NEXT: ;;#ASMSTART
14483 ; GFX900-NEXT: ; def s[8:13]
14484 ; GFX900-NEXT: ;;#ASMEND
14485 ; GFX900-NEXT: s_mov_b32 s8, s12
14486 ; GFX900-NEXT: s_mov_b32 s9, s13
14487 ; GFX900-NEXT: s_mov_b32 s10, s16
14488 ; GFX900-NEXT: s_mov_b32 s11, s17
14489 ; GFX900-NEXT: s_mov_b32 s14, s12
14490 ; GFX900-NEXT: s_mov_b32 s15, s13
14491 ; GFX900-NEXT: ;;#ASMSTART
14492 ; GFX900-NEXT: ; use s[8:15]
14493 ; GFX900-NEXT: ;;#ASMEND
14494 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14496 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
14498 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14499 ; GFX90A-NEXT: ;;#ASMSTART
14500 ; GFX90A-NEXT: ; def s[12:17]
14501 ; GFX90A-NEXT: ;;#ASMEND
14502 ; GFX90A-NEXT: ;;#ASMSTART
14503 ; GFX90A-NEXT: ; def s[8:13]
14504 ; GFX90A-NEXT: ;;#ASMEND
14505 ; GFX90A-NEXT: s_mov_b32 s8, s12
14506 ; GFX90A-NEXT: s_mov_b32 s9, s13
14507 ; GFX90A-NEXT: s_mov_b32 s10, s16
14508 ; GFX90A-NEXT: s_mov_b32 s11, s17
14509 ; GFX90A-NEXT: s_mov_b32 s14, s12
14510 ; GFX90A-NEXT: s_mov_b32 s15, s13
14511 ; GFX90A-NEXT: ;;#ASMSTART
14512 ; GFX90A-NEXT: ; use s[8:15]
14513 ; GFX90A-NEXT: ;;#ASMEND
14514 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14516 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
14518 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14519 ; GFX940-NEXT: ;;#ASMSTART
14520 ; GFX940-NEXT: ; def s[8:13]
14521 ; GFX940-NEXT: ;;#ASMEND
14522 ; GFX940-NEXT: ;;#ASMSTART
14523 ; GFX940-NEXT: ; def s[0:5]
14524 ; GFX940-NEXT: ;;#ASMEND
14525 ; GFX940-NEXT: s_mov_b32 s8, s12
14526 ; GFX940-NEXT: s_mov_b32 s9, s13
14527 ; GFX940-NEXT: s_mov_b32 s10, s4
14528 ; GFX940-NEXT: s_mov_b32 s11, s5
14529 ; GFX940-NEXT: s_mov_b32 s14, s12
14530 ; GFX940-NEXT: s_mov_b32 s15, s13
14531 ; GFX940-NEXT: ;;#ASMSTART
14532 ; GFX940-NEXT: ; use s[8:15]
14533 ; GFX940-NEXT: ;;#ASMEND
14534 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14535 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14536 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14537 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
14538 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14542 define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
14543 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
14545 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14546 ; GFX900-NEXT: ;;#ASMSTART
14547 ; GFX900-NEXT: ; def s[12:17]
14548 ; GFX900-NEXT: ;;#ASMEND
14549 ; GFX900-NEXT: s_mov_b32 s8, s16
14550 ; GFX900-NEXT: s_mov_b32 s9, s17
14551 ; GFX900-NEXT: s_mov_b32 s10, s12
14552 ; GFX900-NEXT: s_mov_b32 s11, s13
14553 ; GFX900-NEXT: s_mov_b32 s12, s16
14554 ; GFX900-NEXT: s_mov_b32 s13, s17
14555 ; GFX900-NEXT: s_mov_b32 s14, s16
14556 ; GFX900-NEXT: s_mov_b32 s15, s17
14557 ; GFX900-NEXT: ;;#ASMSTART
14558 ; GFX900-NEXT: ; use s[8:15]
14559 ; GFX900-NEXT: ;;#ASMEND
14560 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14562 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
14564 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14565 ; GFX90A-NEXT: ;;#ASMSTART
14566 ; GFX90A-NEXT: ; def s[12:17]
14567 ; GFX90A-NEXT: ;;#ASMEND
14568 ; GFX90A-NEXT: s_mov_b32 s8, s16
14569 ; GFX90A-NEXT: s_mov_b32 s9, s17
14570 ; GFX90A-NEXT: s_mov_b32 s10, s12
14571 ; GFX90A-NEXT: s_mov_b32 s11, s13
14572 ; GFX90A-NEXT: s_mov_b32 s12, s16
14573 ; GFX90A-NEXT: s_mov_b32 s13, s17
14574 ; GFX90A-NEXT: s_mov_b32 s14, s16
14575 ; GFX90A-NEXT: s_mov_b32 s15, s17
14576 ; GFX90A-NEXT: ;;#ASMSTART
14577 ; GFX90A-NEXT: ; use s[8:15]
14578 ; GFX90A-NEXT: ;;#ASMEND
14579 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14581 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
14583 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14584 ; GFX940-NEXT: ;;#ASMSTART
14585 ; GFX940-NEXT: ; def s[0:5]
14586 ; GFX940-NEXT: ;;#ASMEND
14587 ; GFX940-NEXT: s_mov_b32 s8, s4
14588 ; GFX940-NEXT: s_mov_b32 s9, s5
14589 ; GFX940-NEXT: s_mov_b32 s10, s0
14590 ; GFX940-NEXT: s_mov_b32 s11, s1
14591 ; GFX940-NEXT: s_mov_b32 s12, s4
14592 ; GFX940-NEXT: s_mov_b32 s13, s5
14593 ; GFX940-NEXT: s_mov_b32 s14, s4
14594 ; GFX940-NEXT: s_mov_b32 s15, s5
14595 ; GFX940-NEXT: ;;#ASMSTART
14596 ; GFX940-NEXT: ; use s[8:15]
14597 ; GFX940-NEXT: ;;#ASMEND
14598 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14599 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14600 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14601 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
14602 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14606 define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
14607 ; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
14609 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14610 ; GFX9-NEXT: ;;#ASMSTART
14611 ; GFX9-NEXT: ; def s[8:13]
14612 ; GFX9-NEXT: ;;#ASMEND
14613 ; GFX9-NEXT: s_mov_b32 s8, s12
14614 ; GFX9-NEXT: s_mov_b32 s9, s13
14615 ; GFX9-NEXT: s_mov_b32 s14, s12
14616 ; GFX9-NEXT: s_mov_b32 s15, s13
14617 ; GFX9-NEXT: ;;#ASMSTART
14618 ; GFX9-NEXT: ; use s[8:15]
14619 ; GFX9-NEXT: ;;#ASMEND
14620 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14621 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14622 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14623 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
14624 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14628 define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
14629 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
14631 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14632 ; GFX900-NEXT: ;;#ASMSTART
14633 ; GFX900-NEXT: ; def s[8:13]
14634 ; GFX900-NEXT: ;;#ASMEND
14635 ; GFX900-NEXT: s_mov_b32 s8, s12
14636 ; GFX900-NEXT: s_mov_b32 s9, s13
14637 ; GFX900-NEXT: s_mov_b32 s10, s12
14638 ; GFX900-NEXT: s_mov_b32 s11, s13
14639 ; GFX900-NEXT: s_mov_b32 s14, s12
14640 ; GFX900-NEXT: s_mov_b32 s15, s13
14641 ; GFX900-NEXT: ;;#ASMSTART
14642 ; GFX900-NEXT: ; use s[8:15]
14643 ; GFX900-NEXT: ;;#ASMEND
14644 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14646 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
14648 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14649 ; GFX90A-NEXT: ;;#ASMSTART
14650 ; GFX90A-NEXT: ; def s[8:13]
14651 ; GFX90A-NEXT: ;;#ASMEND
14652 ; GFX90A-NEXT: s_mov_b32 s8, s12
14653 ; GFX90A-NEXT: s_mov_b32 s9, s13
14654 ; GFX90A-NEXT: s_mov_b32 s10, s12
14655 ; GFX90A-NEXT: s_mov_b32 s11, s13
14656 ; GFX90A-NEXT: s_mov_b32 s14, s12
14657 ; GFX90A-NEXT: s_mov_b32 s15, s13
14658 ; GFX90A-NEXT: ;;#ASMSTART
14659 ; GFX90A-NEXT: ; use s[8:15]
14660 ; GFX90A-NEXT: ;;#ASMEND
14661 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14663 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
14665 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14666 ; GFX940-NEXT: ;;#ASMSTART
14667 ; GFX940-NEXT: ; def s[0:5]
14668 ; GFX940-NEXT: ;;#ASMEND
14669 ; GFX940-NEXT: s_mov_b32 s8, s4
14670 ; GFX940-NEXT: s_mov_b32 s9, s5
14671 ; GFX940-NEXT: s_mov_b32 s10, s4
14672 ; GFX940-NEXT: s_mov_b32 s11, s5
14673 ; GFX940-NEXT: s_mov_b32 s14, s4
14674 ; GFX940-NEXT: s_mov_b32 s15, s5
14675 ; GFX940-NEXT: ;;#ASMSTART
14676 ; GFX940-NEXT: ; use s[8:15]
14677 ; GFX940-NEXT: ;;#ASMEND
14678 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14679 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14680 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14681 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
14682 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14686 define void @s_shuffle_v4i64_v3i64__5_5_0_5() {
14687 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
14689 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14690 ; GFX900-NEXT: ;;#ASMSTART
14691 ; GFX900-NEXT: ; def s[4:9]
14692 ; GFX900-NEXT: ;;#ASMEND
14693 ; GFX900-NEXT: ;;#ASMSTART
14694 ; GFX900-NEXT: ; def s[12:17]
14695 ; GFX900-NEXT: ;;#ASMEND
14696 ; GFX900-NEXT: s_mov_b32 s8, s16
14697 ; GFX900-NEXT: s_mov_b32 s9, s17
14698 ; GFX900-NEXT: s_mov_b32 s10, s16
14699 ; GFX900-NEXT: s_mov_b32 s11, s17
14700 ; GFX900-NEXT: s_mov_b32 s12, s4
14701 ; GFX900-NEXT: s_mov_b32 s13, s5
14702 ; GFX900-NEXT: s_mov_b32 s14, s16
14703 ; GFX900-NEXT: s_mov_b32 s15, s17
14704 ; GFX900-NEXT: ;;#ASMSTART
14705 ; GFX900-NEXT: ; use s[8:15]
14706 ; GFX900-NEXT: ;;#ASMEND
14707 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14709 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
14711 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14712 ; GFX90A-NEXT: ;;#ASMSTART
14713 ; GFX90A-NEXT: ; def s[4:9]
14714 ; GFX90A-NEXT: ;;#ASMEND
14715 ; GFX90A-NEXT: ;;#ASMSTART
14716 ; GFX90A-NEXT: ; def s[12:17]
14717 ; GFX90A-NEXT: ;;#ASMEND
14718 ; GFX90A-NEXT: s_mov_b32 s8, s16
14719 ; GFX90A-NEXT: s_mov_b32 s9, s17
14720 ; GFX90A-NEXT: s_mov_b32 s10, s16
14721 ; GFX90A-NEXT: s_mov_b32 s11, s17
14722 ; GFX90A-NEXT: s_mov_b32 s12, s4
14723 ; GFX90A-NEXT: s_mov_b32 s13, s5
14724 ; GFX90A-NEXT: s_mov_b32 s14, s16
14725 ; GFX90A-NEXT: s_mov_b32 s15, s17
14726 ; GFX90A-NEXT: ;;#ASMSTART
14727 ; GFX90A-NEXT: ; use s[8:15]
14728 ; GFX90A-NEXT: ;;#ASMEND
14729 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14731 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
14733 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14734 ; GFX940-NEXT: ;;#ASMSTART
14735 ; GFX940-NEXT: ; def s[12:17]
14736 ; GFX940-NEXT: ;;#ASMEND
14737 ; GFX940-NEXT: ;;#ASMSTART
14738 ; GFX940-NEXT: ; def s[0:5]
14739 ; GFX940-NEXT: ;;#ASMEND
14740 ; GFX940-NEXT: s_mov_b32 s8, s16
14741 ; GFX940-NEXT: s_mov_b32 s9, s17
14742 ; GFX940-NEXT: s_mov_b32 s10, s16
14743 ; GFX940-NEXT: s_mov_b32 s11, s17
14744 ; GFX940-NEXT: s_mov_b32 s12, s0
14745 ; GFX940-NEXT: s_mov_b32 s13, s1
14746 ; GFX940-NEXT: s_mov_b32 s14, s16
14747 ; GFX940-NEXT: s_mov_b32 s15, s17
14748 ; GFX940-NEXT: ;;#ASMSTART
14749 ; GFX940-NEXT: ; use s[8:15]
14750 ; GFX940-NEXT: ;;#ASMEND
14751 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14752 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14753 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14754 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
14755 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14759 define void @s_shuffle_v4i64_v3i64__5_5_1_5() {
14760 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
14762 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14763 ; GFX900-NEXT: ;;#ASMSTART
14764 ; GFX900-NEXT: ; def s[4:9]
14765 ; GFX900-NEXT: ;;#ASMEND
14766 ; GFX900-NEXT: ;;#ASMSTART
14767 ; GFX900-NEXT: ; def s[12:17]
14768 ; GFX900-NEXT: ;;#ASMEND
14769 ; GFX900-NEXT: s_mov_b32 s8, s16
14770 ; GFX900-NEXT: s_mov_b32 s9, s17
14771 ; GFX900-NEXT: s_mov_b32 s10, s16
14772 ; GFX900-NEXT: s_mov_b32 s11, s17
14773 ; GFX900-NEXT: s_mov_b32 s12, s6
14774 ; GFX900-NEXT: s_mov_b32 s13, s7
14775 ; GFX900-NEXT: s_mov_b32 s14, s16
14776 ; GFX900-NEXT: s_mov_b32 s15, s17
14777 ; GFX900-NEXT: ;;#ASMSTART
14778 ; GFX900-NEXT: ; use s[8:15]
14779 ; GFX900-NEXT: ;;#ASMEND
14780 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14782 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
14784 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14785 ; GFX90A-NEXT: ;;#ASMSTART
14786 ; GFX90A-NEXT: ; def s[4:9]
14787 ; GFX90A-NEXT: ;;#ASMEND
14788 ; GFX90A-NEXT: ;;#ASMSTART
14789 ; GFX90A-NEXT: ; def s[12:17]
14790 ; GFX90A-NEXT: ;;#ASMEND
14791 ; GFX90A-NEXT: s_mov_b32 s8, s16
14792 ; GFX90A-NEXT: s_mov_b32 s9, s17
14793 ; GFX90A-NEXT: s_mov_b32 s10, s16
14794 ; GFX90A-NEXT: s_mov_b32 s11, s17
14795 ; GFX90A-NEXT: s_mov_b32 s12, s6
14796 ; GFX90A-NEXT: s_mov_b32 s13, s7
14797 ; GFX90A-NEXT: s_mov_b32 s14, s16
14798 ; GFX90A-NEXT: s_mov_b32 s15, s17
14799 ; GFX90A-NEXT: ;;#ASMSTART
14800 ; GFX90A-NEXT: ; use s[8:15]
14801 ; GFX90A-NEXT: ;;#ASMEND
14802 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14804 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
14806 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14807 ; GFX940-NEXT: ;;#ASMSTART
14808 ; GFX940-NEXT: ; def s[12:17]
14809 ; GFX940-NEXT: ;;#ASMEND
14810 ; GFX940-NEXT: ;;#ASMSTART
14811 ; GFX940-NEXT: ; def s[0:5]
14812 ; GFX940-NEXT: ;;#ASMEND
14813 ; GFX940-NEXT: s_mov_b32 s8, s16
14814 ; GFX940-NEXT: s_mov_b32 s9, s17
14815 ; GFX940-NEXT: s_mov_b32 s10, s16
14816 ; GFX940-NEXT: s_mov_b32 s11, s17
14817 ; GFX940-NEXT: s_mov_b32 s12, s2
14818 ; GFX940-NEXT: s_mov_b32 s13, s3
14819 ; GFX940-NEXT: s_mov_b32 s14, s16
14820 ; GFX940-NEXT: s_mov_b32 s15, s17
14821 ; GFX940-NEXT: ;;#ASMSTART
14822 ; GFX940-NEXT: ; use s[8:15]
14823 ; GFX940-NEXT: ;;#ASMEND
14824 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14825 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14826 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14827 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
14828 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14832 define void @s_shuffle_v4i64_v3i64__5_5_2_5() {
14833 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
14835 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14836 ; GFX900-NEXT: ;;#ASMSTART
14837 ; GFX900-NEXT: ; def s[8:13]
14838 ; GFX900-NEXT: ;;#ASMEND
14839 ; GFX900-NEXT: ;;#ASMSTART
14840 ; GFX900-NEXT: ; def s[16:21]
14841 ; GFX900-NEXT: ;;#ASMEND
14842 ; GFX900-NEXT: s_mov_b32 s8, s20
14843 ; GFX900-NEXT: s_mov_b32 s9, s21
14844 ; GFX900-NEXT: s_mov_b32 s10, s20
14845 ; GFX900-NEXT: s_mov_b32 s11, s21
14846 ; GFX900-NEXT: s_mov_b32 s14, s20
14847 ; GFX900-NEXT: s_mov_b32 s15, s21
14848 ; GFX900-NEXT: ;;#ASMSTART
14849 ; GFX900-NEXT: ; use s[8:15]
14850 ; GFX900-NEXT: ;;#ASMEND
14851 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14853 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
14855 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14856 ; GFX90A-NEXT: ;;#ASMSTART
14857 ; GFX90A-NEXT: ; def s[8:13]
14858 ; GFX90A-NEXT: ;;#ASMEND
14859 ; GFX90A-NEXT: ;;#ASMSTART
14860 ; GFX90A-NEXT: ; def s[16:21]
14861 ; GFX90A-NEXT: ;;#ASMEND
14862 ; GFX90A-NEXT: s_mov_b32 s8, s20
14863 ; GFX90A-NEXT: s_mov_b32 s9, s21
14864 ; GFX90A-NEXT: s_mov_b32 s10, s20
14865 ; GFX90A-NEXT: s_mov_b32 s11, s21
14866 ; GFX90A-NEXT: s_mov_b32 s14, s20
14867 ; GFX90A-NEXT: s_mov_b32 s15, s21
14868 ; GFX90A-NEXT: ;;#ASMSTART
14869 ; GFX90A-NEXT: ; use s[8:15]
14870 ; GFX90A-NEXT: ;;#ASMEND
14871 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14873 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
14875 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14876 ; GFX940-NEXT: ;;#ASMSTART
14877 ; GFX940-NEXT: ; def s[8:13]
14878 ; GFX940-NEXT: ;;#ASMEND
14879 ; GFX940-NEXT: ;;#ASMSTART
14880 ; GFX940-NEXT: ; def s[0:5]
14881 ; GFX940-NEXT: ;;#ASMEND
14882 ; GFX940-NEXT: s_mov_b32 s8, s4
14883 ; GFX940-NEXT: s_mov_b32 s9, s5
14884 ; GFX940-NEXT: s_mov_b32 s10, s4
14885 ; GFX940-NEXT: s_mov_b32 s11, s5
14886 ; GFX940-NEXT: s_mov_b32 s14, s4
14887 ; GFX940-NEXT: s_mov_b32 s15, s5
14888 ; GFX940-NEXT: ;;#ASMSTART
14889 ; GFX940-NEXT: ; use s[8:15]
14890 ; GFX940-NEXT: ;;#ASMEND
14891 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14892 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14893 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14894 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
14895 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14899 define void @s_shuffle_v4i64_v3i64__5_5_3_5() {
14900 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
14902 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14903 ; GFX900-NEXT: ;;#ASMSTART
14904 ; GFX900-NEXT: ; def s[12:17]
14905 ; GFX900-NEXT: ;;#ASMEND
14906 ; GFX900-NEXT: s_mov_b32 s8, s16
14907 ; GFX900-NEXT: s_mov_b32 s9, s17
14908 ; GFX900-NEXT: s_mov_b32 s10, s16
14909 ; GFX900-NEXT: s_mov_b32 s11, s17
14910 ; GFX900-NEXT: s_mov_b32 s14, s16
14911 ; GFX900-NEXT: s_mov_b32 s15, s17
14912 ; GFX900-NEXT: ;;#ASMSTART
14913 ; GFX900-NEXT: ; use s[8:15]
14914 ; GFX900-NEXT: ;;#ASMEND
14915 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14917 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
14919 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14920 ; GFX90A-NEXT: ;;#ASMSTART
14921 ; GFX90A-NEXT: ; def s[12:17]
14922 ; GFX90A-NEXT: ;;#ASMEND
14923 ; GFX90A-NEXT: s_mov_b32 s8, s16
14924 ; GFX90A-NEXT: s_mov_b32 s9, s17
14925 ; GFX90A-NEXT: s_mov_b32 s10, s16
14926 ; GFX90A-NEXT: s_mov_b32 s11, s17
14927 ; GFX90A-NEXT: s_mov_b32 s14, s16
14928 ; GFX90A-NEXT: s_mov_b32 s15, s17
14929 ; GFX90A-NEXT: ;;#ASMSTART
14930 ; GFX90A-NEXT: ; use s[8:15]
14931 ; GFX90A-NEXT: ;;#ASMEND
14932 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14934 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
14936 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14937 ; GFX940-NEXT: ;;#ASMSTART
14938 ; GFX940-NEXT: ; def s[0:5]
14939 ; GFX940-NEXT: ;;#ASMEND
14940 ; GFX940-NEXT: s_mov_b32 s8, s4
14941 ; GFX940-NEXT: s_mov_b32 s9, s5
14942 ; GFX940-NEXT: s_mov_b32 s10, s4
14943 ; GFX940-NEXT: s_mov_b32 s11, s5
14944 ; GFX940-NEXT: s_mov_b32 s12, s0
14945 ; GFX940-NEXT: s_mov_b32 s13, s1
14946 ; GFX940-NEXT: s_mov_b32 s14, s4
14947 ; GFX940-NEXT: s_mov_b32 s15, s5
14948 ; GFX940-NEXT: ;;#ASMSTART
14949 ; GFX940-NEXT: ; use s[8:15]
14950 ; GFX940-NEXT: ;;#ASMEND
14951 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14952 %vec0 = call <3 x i64> asm "; def $0", "=s"()
14953 %vec1 = call <3 x i64> asm "; def $0", "=s"()
14954 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
14955 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14959 define void @s_shuffle_v4i64_v3i64__5_5_4_5() {
14960 ; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
14962 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14963 ; GFX900-NEXT: ;;#ASMSTART
14964 ; GFX900-NEXT: ; def s[12:17]
14965 ; GFX900-NEXT: ;;#ASMEND
14966 ; GFX900-NEXT: s_mov_b32 s8, s16
14967 ; GFX900-NEXT: s_mov_b32 s9, s17
14968 ; GFX900-NEXT: s_mov_b32 s10, s16
14969 ; GFX900-NEXT: s_mov_b32 s11, s17
14970 ; GFX900-NEXT: s_mov_b32 s12, s14
14971 ; GFX900-NEXT: s_mov_b32 s13, s15
14972 ; GFX900-NEXT: s_mov_b32 s14, s16
14973 ; GFX900-NEXT: s_mov_b32 s15, s17
14974 ; GFX900-NEXT: ;;#ASMSTART
14975 ; GFX900-NEXT: ; use s[8:15]
14976 ; GFX900-NEXT: ;;#ASMEND
14977 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14979 ; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
14981 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14982 ; GFX90A-NEXT: ;;#ASMSTART
14983 ; GFX90A-NEXT: ; def s[12:17]
14984 ; GFX90A-NEXT: ;;#ASMEND
14985 ; GFX90A-NEXT: s_mov_b32 s8, s16
14986 ; GFX90A-NEXT: s_mov_b32 s9, s17
14987 ; GFX90A-NEXT: s_mov_b32 s10, s16
14988 ; GFX90A-NEXT: s_mov_b32 s11, s17
14989 ; GFX90A-NEXT: s_mov_b32 s12, s14
14990 ; GFX90A-NEXT: s_mov_b32 s13, s15
14991 ; GFX90A-NEXT: s_mov_b32 s14, s16
14992 ; GFX90A-NEXT: s_mov_b32 s15, s17
14993 ; GFX90A-NEXT: ;;#ASMSTART
14994 ; GFX90A-NEXT: ; use s[8:15]
14995 ; GFX90A-NEXT: ;;#ASMEND
14996 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14998 ; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
15000 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15001 ; GFX940-NEXT: ;;#ASMSTART
15002 ; GFX940-NEXT: ; def s[0:5]
15003 ; GFX940-NEXT: ;;#ASMEND
15004 ; GFX940-NEXT: s_mov_b32 s8, s4
15005 ; GFX940-NEXT: s_mov_b32 s9, s5
15006 ; GFX940-NEXT: s_mov_b32 s10, s4
15007 ; GFX940-NEXT: s_mov_b32 s11, s5
15008 ; GFX940-NEXT: s_mov_b32 s12, s2
15009 ; GFX940-NEXT: s_mov_b32 s13, s3
15010 ; GFX940-NEXT: s_mov_b32 s14, s4
15011 ; GFX940-NEXT: s_mov_b32 s15, s5
15012 ; GFX940-NEXT: ;;#ASMSTART
15013 ; GFX940-NEXT: ; use s[8:15]
15014 ; GFX940-NEXT: ;;#ASMEND
15015 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15016 %vec0 = call <3 x i64> asm "; def $0", "=s"()
15017 %vec1 = call <3 x i64> asm "; def $0", "=s"()
15018 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
15019 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
15022 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
15023 ; GFX90APLUS: {{.*}}