1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2,X64,X64-SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2,X64,X64-AVX2
4 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2,X86,X86-SSE2
5 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2,X86,X86-AVX2
7 ;------------------------------ 32-bit shuffles -------------------------------;
9 define <4 x i32> @shuffle_i32_of_shl_i16(<8 x i16> %x) nounwind {
10 ; SSE2-LABEL: shuffle_i32_of_shl_i16:
12 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
13 ; SSE2-NEXT: psllw $15, %xmm0
14 ; SSE2-NEXT: ret{{[l|q]}}
16 ; AVX2-LABEL: shuffle_i32_of_shl_i16:
18 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
19 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
20 ; AVX2-NEXT: ret{{[l|q]}}
21 %i1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %x, i32 15)
22 %i2 = bitcast <8 x i16> %i1 to <4 x i32>
23 %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
26 define <4 x i32> @shuffle_i32_of_lshr_i16(<8 x i16> %x) nounwind {
27 ; SSE2-LABEL: shuffle_i32_of_lshr_i16:
29 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
30 ; SSE2-NEXT: psrlw $15, %xmm0
31 ; SSE2-NEXT: ret{{[l|q]}}
33 ; AVX2-LABEL: shuffle_i32_of_lshr_i16:
35 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
36 ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
37 ; AVX2-NEXT: ret{{[l|q]}}
38 %i1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %x, i32 15)
39 %i2 = bitcast <8 x i16> %i1 to <4 x i32>
40 %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
43 define <4 x i32> @shuffle_i32_of_ashr_i16(<8 x i16> %x) nounwind {
44 ; SSE2-LABEL: shuffle_i32_of_ashr_i16:
46 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
47 ; SSE2-NEXT: psraw $15, %xmm0
48 ; SSE2-NEXT: ret{{[l|q]}}
50 ; AVX2-LABEL: shuffle_i32_of_ashr_i16:
52 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
53 ; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
54 ; AVX2-NEXT: ret{{[l|q]}}
55 %i1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %x, i32 15)
56 %i2 = bitcast <8 x i16> %i1 to <4 x i32>
57 %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
61 define <4 x i32> @shuffle_i32_of_shl_i32(<4 x i32> %x) nounwind {
62 ; SSE2-LABEL: shuffle_i32_of_shl_i32:
64 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
65 ; SSE2-NEXT: pslld $31, %xmm0
66 ; SSE2-NEXT: ret{{[l|q]}}
68 ; AVX2-LABEL: shuffle_i32_of_shl_i32:
70 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
71 ; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
72 ; AVX2-NEXT: ret{{[l|q]}}
73 %i1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %x, i32 31)
74 %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
77 define <4 x i32> @shuffle_i32_of_lshr_i32(<4 x i32> %x) nounwind {
78 ; SSE2-LABEL: shuffle_i32_of_lshr_i32:
80 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
81 ; SSE2-NEXT: psrld $31, %xmm0
82 ; SSE2-NEXT: ret{{[l|q]}}
84 ; AVX2-LABEL: shuffle_i32_of_lshr_i32:
86 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
87 ; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
88 ; AVX2-NEXT: ret{{[l|q]}}
89 %i1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %x, i32 31)
90 %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
93 define <4 x i32> @shuffle_i32_of_ashr_i32(<4 x i32> %x) nounwind {
94 ; SSE2-LABEL: shuffle_i32_of_ashr_i32:
96 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
97 ; SSE2-NEXT: psrad $31, %xmm0
98 ; SSE2-NEXT: ret{{[l|q]}}
100 ; AVX2-LABEL: shuffle_i32_of_ashr_i32:
102 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
103 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
104 ; AVX2-NEXT: ret{{[l|q]}}
105 %i1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %x, i32 31)
106 %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
110 define <4 x i32> @shuffle_i32_of_shl_i64(<2 x i64> %x) nounwind {
111 ; SSE2-LABEL: shuffle_i32_of_shl_i64:
113 ; SSE2-NEXT: psllq $63, %xmm0
114 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
115 ; SSE2-NEXT: ret{{[l|q]}}
117 ; AVX2-LABEL: shuffle_i32_of_shl_i64:
119 ; AVX2-NEXT: vpsllq $63, %xmm0, %xmm0
120 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
121 ; AVX2-NEXT: ret{{[l|q]}}
122 %i1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %x, i32 63)
123 %i2 = bitcast <2 x i64> %i1 to <4 x i32>
124 %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
127 define <4 x i32> @shuffle_i32_of_lshr_i64(<2 x i64> %x) nounwind {
128 ; SSE2-LABEL: shuffle_i32_of_lshr_i64:
130 ; SSE2-NEXT: psrlq $63, %xmm0
131 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
132 ; SSE2-NEXT: ret{{[l|q]}}
134 ; AVX2-LABEL: shuffle_i32_of_lshr_i64:
136 ; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
137 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
138 ; AVX2-NEXT: ret{{[l|q]}}
139 %i1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %x, i32 63)
140 %i2 = bitcast <2 x i64> %i1 to <4 x i32>
141 %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
144 define <4 x i32> @shuffle_i32_of_ashr_i64(<2 x i64> %x) nounwind {
145 ; X64-SSE2-LABEL: shuffle_i32_of_ashr_i64:
147 ; X64-SSE2-NEXT: pushq %rax
148 ; X64-SSE2-NEXT: movl $63, %edi
149 ; X64-SSE2-NEXT: callq llvm.x86.sse2.psrai.q@PLT
150 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
151 ; X64-SSE2-NEXT: popq %rax
152 ; X64-SSE2-NEXT: retq
154 ; X64-AVX2-LABEL: shuffle_i32_of_ashr_i64:
156 ; X64-AVX2-NEXT: pushq %rax
157 ; X64-AVX2-NEXT: movl $63, %edi
158 ; X64-AVX2-NEXT: callq llvm.x86.sse2.psrai.q@PLT
159 ; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
160 ; X64-AVX2-NEXT: popq %rax
161 ; X64-AVX2-NEXT: retq
163 ; X86-SSE2-LABEL: shuffle_i32_of_ashr_i64:
165 ; X86-SSE2-NEXT: pushl $63
166 ; X86-SSE2-NEXT: calll llvm.x86.sse2.psrai.q@PLT
167 ; X86-SSE2-NEXT: addl $4, %esp
168 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
169 ; X86-SSE2-NEXT: retl
171 ; X86-AVX2-LABEL: shuffle_i32_of_ashr_i64:
173 ; X86-AVX2-NEXT: pushl $63
174 ; X86-AVX2-NEXT: calll llvm.x86.sse2.psrai.q@PLT
175 ; X86-AVX2-NEXT: addl $4, %esp
176 ; X86-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
177 ; X86-AVX2-NEXT: retl
178 %i1 = tail call <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64> %x, i32 63)
179 %i2 = bitcast <2 x i64> %i1 to <4 x i32>
180 %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
184 ;------------------------------ 64-bit shuffles -------------------------------;
186 define <2 x i64> @shuffle_i64_of_shl_i16(<8 x i16> %x) nounwind {
187 ; SSE2-LABEL: shuffle_i64_of_shl_i16:
189 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
190 ; SSE2-NEXT: psllw $15, %xmm0
191 ; SSE2-NEXT: ret{{[l|q]}}
193 ; AVX2-LABEL: shuffle_i64_of_shl_i16:
195 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
196 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
197 ; AVX2-NEXT: ret{{[l|q]}}
198 %i1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %x, i32 15)
199 %i2 = bitcast <8 x i16> %i1 to <2 x i64>
200 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
203 define <2 x i64> @shuffle_i64_of_lshr_i16(<8 x i16> %x) nounwind {
204 ; SSE2-LABEL: shuffle_i64_of_lshr_i16:
206 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
207 ; SSE2-NEXT: psrlw $15, %xmm0
208 ; SSE2-NEXT: ret{{[l|q]}}
210 ; AVX2-LABEL: shuffle_i64_of_lshr_i16:
212 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
213 ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
214 ; AVX2-NEXT: ret{{[l|q]}}
215 %i1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %x, i32 15)
216 %i2 = bitcast <8 x i16> %i1 to <2 x i64>
217 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
220 define <2 x i64> @shuffle_i64_of_ashr_i16(<8 x i16> %x) nounwind {
221 ; SSE2-LABEL: shuffle_i64_of_ashr_i16:
223 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
224 ; SSE2-NEXT: psraw $15, %xmm0
225 ; SSE2-NEXT: ret{{[l|q]}}
227 ; AVX2-LABEL: shuffle_i64_of_ashr_i16:
229 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
230 ; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
231 ; AVX2-NEXT: ret{{[l|q]}}
232 %i1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %x, i32 15)
233 %i2 = bitcast <8 x i16> %i1 to <2 x i64>
234 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
238 define <2 x i64> @shuffle_i64_of_shl_i32(<4 x i32> %x) nounwind {
239 ; SSE2-LABEL: shuffle_i64_of_shl_i32:
241 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
242 ; SSE2-NEXT: pslld $31, %xmm0
243 ; SSE2-NEXT: ret{{[l|q]}}
245 ; AVX2-LABEL: shuffle_i64_of_shl_i32:
247 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
248 ; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
249 ; AVX2-NEXT: ret{{[l|q]}}
250 %i1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %x, i32 31)
251 %i2 = bitcast <4 x i32> %i1 to <2 x i64>
252 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
255 define <2 x i64> @shuffle_i64_of_lshr_i32(<4 x i32> %x) nounwind {
256 ; SSE2-LABEL: shuffle_i64_of_lshr_i32:
258 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
259 ; SSE2-NEXT: psrld $31, %xmm0
260 ; SSE2-NEXT: ret{{[l|q]}}
262 ; AVX2-LABEL: shuffle_i64_of_lshr_i32:
264 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
265 ; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
266 ; AVX2-NEXT: ret{{[l|q]}}
267 %i1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %x, i32 31)
268 %i2 = bitcast <4 x i32> %i1 to <2 x i64>
269 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
272 define <2 x i64> @shuffle_i64_of_ashr_i32(<4 x i32> %x) nounwind {
273 ; SSE2-LABEL: shuffle_i64_of_ashr_i32:
275 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
276 ; SSE2-NEXT: psrad $31, %xmm0
277 ; SSE2-NEXT: ret{{[l|q]}}
279 ; AVX2-LABEL: shuffle_i64_of_ashr_i32:
281 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
282 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
283 ; AVX2-NEXT: ret{{[l|q]}}
284 %i1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %x, i32 31)
285 %i2 = bitcast <4 x i32> %i1 to <2 x i64>
286 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
290 define <2 x i64> @shuffle_i64_of_shl_i64(<2 x i64> %x) nounwind {
291 ; SSE2-LABEL: shuffle_i64_of_shl_i64:
293 ; SSE2-NEXT: psllq $63, %xmm0
294 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
295 ; SSE2-NEXT: ret{{[l|q]}}
297 ; AVX2-LABEL: shuffle_i64_of_shl_i64:
299 ; AVX2-NEXT: vpsllq $63, %xmm0, %xmm0
300 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
301 ; AVX2-NEXT: ret{{[l|q]}}
302 %i1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %x, i32 63)
303 %i2 = bitcast <2 x i64> %i1 to <2 x i64>
304 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
307 define <2 x i64> @shuffle_i64_of_lshr_i64(<2 x i64> %x) nounwind {
308 ; SSE2-LABEL: shuffle_i64_of_lshr_i64:
310 ; SSE2-NEXT: psrlq $63, %xmm0
311 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
312 ; SSE2-NEXT: ret{{[l|q]}}
314 ; AVX2-LABEL: shuffle_i64_of_lshr_i64:
316 ; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
317 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
318 ; AVX2-NEXT: ret{{[l|q]}}
319 %i1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %x, i32 63)
320 %i2 = bitcast <2 x i64> %i1 to <2 x i64>
321 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
324 define <2 x i64> @shuffle_i64_of_ashr_i64(<2 x i64> %x) nounwind {
325 ; X64-SSE2-LABEL: shuffle_i64_of_ashr_i64:
327 ; X64-SSE2-NEXT: pushq %rax
328 ; X64-SSE2-NEXT: movl $63, %edi
329 ; X64-SSE2-NEXT: callq llvm.x86.sse2.psrai.q@PLT
330 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
331 ; X64-SSE2-NEXT: popq %rax
332 ; X64-SSE2-NEXT: retq
334 ; X64-AVX2-LABEL: shuffle_i64_of_ashr_i64:
336 ; X64-AVX2-NEXT: pushq %rax
337 ; X64-AVX2-NEXT: movl $63, %edi
338 ; X64-AVX2-NEXT: callq llvm.x86.sse2.psrai.q@PLT
339 ; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
340 ; X64-AVX2-NEXT: popq %rax
341 ; X64-AVX2-NEXT: retq
343 ; X86-SSE2-LABEL: shuffle_i64_of_ashr_i64:
345 ; X86-SSE2-NEXT: pushl $63
346 ; X86-SSE2-NEXT: calll llvm.x86.sse2.psrai.q@PLT
347 ; X86-SSE2-NEXT: addl $4, %esp
348 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
349 ; X86-SSE2-NEXT: retl
351 ; X86-AVX2-LABEL: shuffle_i64_of_ashr_i64:
353 ; X86-AVX2-NEXT: pushl $63
354 ; X86-AVX2-NEXT: calll llvm.x86.sse2.psrai.q@PLT
355 ; X86-AVX2-NEXT: addl $4, %esp
356 ; X86-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
357 ; X86-AVX2-NEXT: retl
358 %i1 = tail call <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64> %x, i32 63)
359 %i2 = bitcast <2 x i64> %i1 to <2 x i64>
360 %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
364 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
365 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32)
366 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
367 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)
368 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32)
369 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32)
370 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32)
371 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32)
372 declare <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64>, i32) ; does not exist
373 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: