1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
15 ; Just one 32-bit run to make sure we do reasonable things for i64 cases.
16 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2
18 declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
24 define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
25 ; SSE2-LABEL: var_funnnel_v2i32:
27 ; SSE2-NEXT: pxor %xmm2, %xmm2
28 ; SSE2-NEXT: psubd %xmm1, %xmm2
29 ; SSE2-NEXT: pslld $23, %xmm2
30 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
31 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
32 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm1
33 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
34 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
35 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
36 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
37 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
38 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
39 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
40 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
41 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
42 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
43 ; SSE2-NEXT: por %xmm3, %xmm0
46 ; SSE41-LABEL: var_funnnel_v2i32:
48 ; SSE41-NEXT: pxor %xmm2, %xmm2
49 ; SSE41-NEXT: psubd %xmm1, %xmm2
50 ; SSE41-NEXT: pslld $23, %xmm2
51 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
52 ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
53 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm1
54 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
55 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
56 ; SSE41-NEXT: pmuludq %xmm2, %xmm3
57 ; SSE41-NEXT: pmuludq %xmm1, %xmm0
58 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
59 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
60 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
61 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
62 ; SSE41-NEXT: por %xmm1, %xmm0
65 ; AVX1-LABEL: var_funnnel_v2i32:
67 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
68 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
69 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
70 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
71 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
72 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
73 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
74 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
75 ; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
76 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
77 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
78 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
79 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
80 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
81 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
84 ; AVX2-LABEL: var_funnnel_v2i32:
86 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
87 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
88 ; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2
89 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
90 ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
91 ; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
92 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
95 ; AVX512F-LABEL: var_funnnel_v2i32:
97 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
98 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
99 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
100 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
101 ; AVX512F-NEXT: vzeroupper
104 ; AVX512VL-LABEL: var_funnnel_v2i32:
106 ; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0
107 ; AVX512VL-NEXT: retq
109 ; AVX512BW-LABEL: var_funnnel_v2i32:
111 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
112 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
113 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
114 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
115 ; AVX512BW-NEXT: vzeroupper
116 ; AVX512BW-NEXT: retq
118 ; AVX512VLBW-LABEL: var_funnnel_v2i32:
119 ; AVX512VLBW: # %bb.0:
120 ; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0
121 ; AVX512VLBW-NEXT: retq
123 ; AVX512VBMI2-LABEL: var_funnnel_v2i32:
124 ; AVX512VBMI2: # %bb.0:
125 ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
126 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
127 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
128 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
129 ; AVX512VBMI2-NEXT: vzeroupper
130 ; AVX512VBMI2-NEXT: retq
132 ; AVX512VLVBMI2-LABEL: var_funnnel_v2i32:
133 ; AVX512VLVBMI2: # %bb.0:
134 ; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0
135 ; AVX512VLVBMI2-NEXT: retq
137 ; XOP-LABEL: var_funnnel_v2i32:
139 ; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
140 ; XOP-NEXT: vpsubd %xmm1, %xmm2, %xmm1
141 ; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0
144 ; X86-SSE2-LABEL: var_funnnel_v2i32:
146 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
147 ; X86-SSE2-NEXT: psubd %xmm1, %xmm2
148 ; X86-SSE2-NEXT: pslld $23, %xmm2
149 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
150 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
151 ; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1
152 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
153 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
154 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
155 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
156 ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
157 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
158 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
159 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
160 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
161 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
162 ; X86-SSE2-NEXT: por %xmm3, %xmm0
163 ; X86-SSE2-NEXT: retl
164 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt)
169 ; Uniform Variable Shifts
172 define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
173 ; SSE2-LABEL: splatvar_funnnel_v2i32:
175 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
176 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
177 ; SSE2-NEXT: psrlq %xmm1, %xmm2
178 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
179 ; SSE2-NEXT: psrlq %xmm1, %xmm0
180 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
183 ; SSE41-LABEL: splatvar_funnnel_v2i32:
185 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
186 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
187 ; SSE41-NEXT: psrlq %xmm1, %xmm2
188 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
189 ; SSE41-NEXT: psrlq %xmm1, %xmm0
190 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
193 ; AVX1-LABEL: splatvar_funnnel_v2i32:
195 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
196 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
197 ; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
198 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
199 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
200 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
203 ; AVX2-LABEL: splatvar_funnnel_v2i32:
205 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
206 ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
207 ; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
208 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
209 ; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
210 ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
213 ; AVX512F-LABEL: splatvar_funnnel_v2i32:
215 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
216 ; AVX512F-NEXT: vpbroadcastd %xmm1, %xmm1
217 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
218 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
219 ; AVX512F-NEXT: vzeroupper
222 ; AVX512VL-LABEL: splatvar_funnnel_v2i32:
224 ; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
225 ; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0
226 ; AVX512VL-NEXT: retq
228 ; AVX512BW-LABEL: splatvar_funnnel_v2i32:
230 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
231 ; AVX512BW-NEXT: vpbroadcastd %xmm1, %xmm1
232 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
233 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
234 ; AVX512BW-NEXT: vzeroupper
235 ; AVX512BW-NEXT: retq
237 ; AVX512VLBW-LABEL: splatvar_funnnel_v2i32:
238 ; AVX512VLBW: # %bb.0:
239 ; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %xmm1
240 ; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0
241 ; AVX512VLBW-NEXT: retq
243 ; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32:
244 ; AVX512VBMI2: # %bb.0:
245 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
246 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %xmm1
247 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
248 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
249 ; AVX512VBMI2-NEXT: vzeroupper
250 ; AVX512VBMI2-NEXT: retq
252 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32:
253 ; AVX512VLVBMI2: # %bb.0:
254 ; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %xmm1
255 ; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0
256 ; AVX512VLVBMI2-NEXT: retq
258 ; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
260 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
261 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
262 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
263 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
266 ; XOPAVX2-LABEL: splatvar_funnnel_v2i32:
268 ; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
269 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
270 ; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
271 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
274 ; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
276 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
277 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
278 ; X86-SSE2-NEXT: psrlq %xmm1, %xmm2
279 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
280 ; X86-SSE2-NEXT: psrlq %xmm1, %xmm0
281 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
282 ; X86-SSE2-NEXT: retl
283 %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer
284 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat)
292 define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
293 ; SSE2-LABEL: constant_funnnel_v2i32:
295 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
296 ; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
297 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
298 ; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
299 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
300 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
301 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
302 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
303 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
304 ; SSE2-NEXT: por %xmm2, %xmm0
307 ; SSE41-LABEL: constant_funnnel_v2i32:
309 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
310 ; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
311 ; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
312 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
313 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
314 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
315 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
316 ; SSE41-NEXT: por %xmm2, %xmm0
319 ; AVX1-LABEL: constant_funnnel_v2i32:
321 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
322 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
323 ; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
324 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
325 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
326 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
327 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
328 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
331 ; AVX2-LABEL: constant_funnnel_v2i32:
333 ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
334 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
335 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
338 ; AVX512F-LABEL: constant_funnnel_v2i32:
340 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
341 ; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0]
342 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
343 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
344 ; AVX512F-NEXT: vzeroupper
347 ; AVX512VL-LABEL: constant_funnnel_v2i32:
349 ; AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
350 ; AVX512VL-NEXT: retq
352 ; AVX512BW-LABEL: constant_funnnel_v2i32:
354 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
355 ; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0]
356 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
357 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
358 ; AVX512BW-NEXT: vzeroupper
359 ; AVX512BW-NEXT: retq
361 ; AVX512VLBW-LABEL: constant_funnnel_v2i32:
362 ; AVX512VLBW: # %bb.0:
363 ; AVX512VLBW-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
364 ; AVX512VLBW-NEXT: retq
366 ; AVX512VBMI2-LABEL: constant_funnnel_v2i32:
367 ; AVX512VBMI2: # %bb.0:
368 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
369 ; AVX512VBMI2-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0]
370 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
371 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
372 ; AVX512VBMI2-NEXT: vzeroupper
373 ; AVX512VBMI2-NEXT: retq
375 ; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32:
376 ; AVX512VLVBMI2: # %bb.0:
377 ; AVX512VLVBMI2-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
378 ; AVX512VLVBMI2-NEXT: retq
380 ; XOP-LABEL: constant_funnnel_v2i32:
382 ; XOP-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
385 ; X86-SSE2-LABEL: constant_funnnel_v2i32:
387 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
388 ; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
389 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
390 ; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
391 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
392 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
393 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
394 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
395 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
396 ; X86-SSE2-NEXT: por %xmm2, %xmm0
397 ; X86-SSE2-NEXT: retl
398 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>)
403 ; Uniform Constant Shifts
406 define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
407 ; SSE2-LABEL: splatconstant_funnnel_v2i32:
409 ; SSE2-NEXT: movdqa %xmm0, %xmm1
410 ; SSE2-NEXT: psrld $4, %xmm1
411 ; SSE2-NEXT: pslld $28, %xmm0
412 ; SSE2-NEXT: por %xmm1, %xmm0
415 ; SSE41-LABEL: splatconstant_funnnel_v2i32:
417 ; SSE41-NEXT: movdqa %xmm0, %xmm1
418 ; SSE41-NEXT: psrld $4, %xmm1
419 ; SSE41-NEXT: pslld $28, %xmm0
420 ; SSE41-NEXT: por %xmm1, %xmm0
423 ; AVX1-LABEL: splatconstant_funnnel_v2i32:
425 ; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1
426 ; AVX1-NEXT: vpslld $28, %xmm0, %xmm0
427 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
430 ; AVX2-LABEL: splatconstant_funnnel_v2i32:
432 ; AVX2-NEXT: vpsrld $4, %xmm0, %xmm1
433 ; AVX2-NEXT: vpslld $28, %xmm0, %xmm0
434 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
437 ; AVX512F-LABEL: splatconstant_funnnel_v2i32:
439 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
440 ; AVX512F-NEXT: vprord $4, %zmm0, %zmm0
441 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
442 ; AVX512F-NEXT: vzeroupper
445 ; AVX512VL-LABEL: splatconstant_funnnel_v2i32:
447 ; AVX512VL-NEXT: vprord $4, %xmm0, %xmm0
448 ; AVX512VL-NEXT: retq
450 ; AVX512BW-LABEL: splatconstant_funnnel_v2i32:
452 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
453 ; AVX512BW-NEXT: vprord $4, %zmm0, %zmm0
454 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
455 ; AVX512BW-NEXT: vzeroupper
456 ; AVX512BW-NEXT: retq
458 ; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32:
459 ; AVX512VLBW: # %bb.0:
460 ; AVX512VLBW-NEXT: vprord $4, %xmm0, %xmm0
461 ; AVX512VLBW-NEXT: retq
463 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32:
464 ; AVX512VBMI2: # %bb.0:
465 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
466 ; AVX512VBMI2-NEXT: vprord $4, %zmm0, %zmm0
467 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
468 ; AVX512VBMI2-NEXT: vzeroupper
469 ; AVX512VBMI2-NEXT: retq
471 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32:
472 ; AVX512VLVBMI2: # %bb.0:
473 ; AVX512VLVBMI2-NEXT: vprord $4, %xmm0, %xmm0
474 ; AVX512VLVBMI2-NEXT: retq
476 ; XOP-LABEL: splatconstant_funnnel_v2i32:
478 ; XOP-NEXT: vprotd $28, %xmm0, %xmm0
481 ; X86-SSE2-LABEL: splatconstant_funnnel_v2i32:
483 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
484 ; X86-SSE2-NEXT: psrld $4, %xmm1
485 ; X86-SSE2-NEXT: pslld $28, %xmm0
486 ; X86-SSE2-NEXT: por %xmm1, %xmm0
487 ; X86-SSE2-NEXT: retl
488 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)