1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
15 ; Just one 32-bit run to make sure we do reasonable things for i64 cases.
16 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2
18 declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
24 define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
25 ; SSE2-LABEL: var_funnnel_v2i32:
27 ; SSE2-NEXT: pxor %xmm2, %xmm2
28 ; SSE2-NEXT: psubd %xmm1, %xmm2
29 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
30 ; SSE2-NEXT: pslld $23, %xmm2
31 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
32 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm1
33 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
34 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
35 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
36 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
37 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
38 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
39 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
40 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
41 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
42 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
43 ; SSE2-NEXT: por %xmm3, %xmm0
46 ; SSE41-LABEL: var_funnnel_v2i32:
48 ; SSE41-NEXT: pxor %xmm2, %xmm2
49 ; SSE41-NEXT: psubd %xmm1, %xmm2
50 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
51 ; SSE41-NEXT: pslld $23, %xmm2
52 ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
53 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm1
54 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
55 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
56 ; SSE41-NEXT: pmuludq %xmm2, %xmm3
57 ; SSE41-NEXT: pmuludq %xmm1, %xmm0
58 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
59 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
60 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
61 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
62 ; SSE41-NEXT: por %xmm1, %xmm0
65 ; AVX1-LABEL: var_funnnel_v2i32:
67 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
68 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
69 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
70 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
71 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
72 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
73 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
74 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
75 ; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
76 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
77 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
78 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
79 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
80 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
81 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
84 ; AVX2-LABEL: var_funnnel_v2i32:
86 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
87 ; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
88 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
89 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
90 ; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2
91 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
92 ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
93 ; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
94 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
97 ; AVX512F-LABEL: var_funnnel_v2i32:
99 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
100 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
101 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
102 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
103 ; AVX512F-NEXT: vzeroupper
106 ; AVX512VL-LABEL: var_funnnel_v2i32:
108 ; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0
109 ; AVX512VL-NEXT: retq
111 ; AVX512BW-LABEL: var_funnnel_v2i32:
113 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
114 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
115 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
116 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
117 ; AVX512BW-NEXT: vzeroupper
118 ; AVX512BW-NEXT: retq
120 ; AVX512VLBW-LABEL: var_funnnel_v2i32:
121 ; AVX512VLBW: # %bb.0:
122 ; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0
123 ; AVX512VLBW-NEXT: retq
125 ; AVX512VBMI2-LABEL: var_funnnel_v2i32:
126 ; AVX512VBMI2: # %bb.0:
127 ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
128 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
129 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
130 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
131 ; AVX512VBMI2-NEXT: vzeroupper
132 ; AVX512VBMI2-NEXT: retq
134 ; AVX512VLVBMI2-LABEL: var_funnnel_v2i32:
135 ; AVX512VLVBMI2: # %bb.0:
136 ; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0
137 ; AVX512VLVBMI2-NEXT: retq
139 ; XOP-LABEL: var_funnnel_v2i32:
141 ; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
142 ; XOP-NEXT: vpsubd %xmm1, %xmm2, %xmm1
143 ; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0
146 ; X86-SSE2-LABEL: var_funnnel_v2i32:
148 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
149 ; X86-SSE2-NEXT: psubd %xmm1, %xmm2
150 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
151 ; X86-SSE2-NEXT: pslld $23, %xmm2
152 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
153 ; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1
154 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
155 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
156 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
157 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
158 ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
159 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
160 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
161 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
162 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
163 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
164 ; X86-SSE2-NEXT: por %xmm3, %xmm0
165 ; X86-SSE2-NEXT: retl
166 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt)
171 ; Uniform Variable Shifts
174 define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
175 ; SSE2-LABEL: splatvar_funnnel_v2i32:
177 ; SSE2-NEXT: pxor %xmm2, %xmm2
178 ; SSE2-NEXT: psubd %xmm1, %xmm2
179 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
180 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
181 ; SSE2-NEXT: pslld $23, %xmm1
182 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
183 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
184 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
185 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
186 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
187 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
188 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
189 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
190 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
191 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
192 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
193 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
194 ; SSE2-NEXT: por %xmm3, %xmm0
197 ; SSE41-LABEL: splatvar_funnnel_v2i32:
199 ; SSE41-NEXT: pxor %xmm2, %xmm2
200 ; SSE41-NEXT: psubd %xmm1, %xmm2
201 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
202 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
203 ; SSE41-NEXT: pslld $23, %xmm1
204 ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
205 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
206 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
207 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
208 ; SSE41-NEXT: pmuludq %xmm2, %xmm3
209 ; SSE41-NEXT: pmuludq %xmm1, %xmm0
210 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
211 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
212 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
213 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
214 ; SSE41-NEXT: por %xmm1, %xmm0
217 ; AVX1-LABEL: splatvar_funnnel_v2i32:
219 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
220 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
221 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
222 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
223 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
224 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
225 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
226 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
227 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
228 ; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
229 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
230 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
231 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
232 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
233 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
234 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
237 ; AVX2-LABEL: splatvar_funnnel_v2i32:
239 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
240 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
241 ; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
242 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
243 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
244 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
245 ; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2
246 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
247 ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
248 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
249 ; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
250 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
253 ; AVX512F-LABEL: splatvar_funnnel_v2i32:
255 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
256 ; AVX512F-NEXT: vpbroadcastd %xmm1, %xmm1
257 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
258 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
259 ; AVX512F-NEXT: vzeroupper
262 ; AVX512VL-LABEL: splatvar_funnnel_v2i32:
264 ; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
265 ; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0
266 ; AVX512VL-NEXT: retq
268 ; AVX512BW-LABEL: splatvar_funnnel_v2i32:
270 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
271 ; AVX512BW-NEXT: vpbroadcastd %xmm1, %xmm1
272 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
273 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
274 ; AVX512BW-NEXT: vzeroupper
275 ; AVX512BW-NEXT: retq
277 ; AVX512VLBW-LABEL: splatvar_funnnel_v2i32:
278 ; AVX512VLBW: # %bb.0:
279 ; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %xmm1
280 ; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0
281 ; AVX512VLBW-NEXT: retq
283 ; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32:
284 ; AVX512VBMI2: # %bb.0:
285 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
286 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %xmm1
287 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
288 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
289 ; AVX512VBMI2-NEXT: vzeroupper
290 ; AVX512VBMI2-NEXT: retq
292 ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32:
293 ; AVX512VLVBMI2: # %bb.0:
294 ; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %xmm1
295 ; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0
296 ; AVX512VLVBMI2-NEXT: retq
298 ; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
300 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
301 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
302 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
303 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
306 ; XOPAVX2-LABEL: splatvar_funnnel_v2i32:
308 ; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
309 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
310 ; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
311 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
314 ; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
316 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
317 ; X86-SSE2-NEXT: psubd %xmm1, %xmm2
318 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
319 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
320 ; X86-SSE2-NEXT: pslld $23, %xmm1
321 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
322 ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
323 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
324 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
325 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
326 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
327 ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
328 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
329 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
330 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
331 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
332 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
333 ; X86-SSE2-NEXT: por %xmm3, %xmm0
334 ; X86-SSE2-NEXT: retl
335 %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer
336 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat)
344 define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
345 ; SSE2-LABEL: constant_funnnel_v2i32:
347 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
348 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
349 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
350 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
351 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
352 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
353 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
354 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
355 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
356 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
357 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
358 ; SSE2-NEXT: por %xmm3, %xmm0
361 ; SSE41-LABEL: constant_funnnel_v2i32:
363 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
364 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
365 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
366 ; SSE41-NEXT: pmuludq %xmm2, %xmm3
367 ; SSE41-NEXT: pmuludq %xmm1, %xmm0
368 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
369 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
370 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
371 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
372 ; SSE41-NEXT: por %xmm1, %xmm0
375 ; AVX1-LABEL: constant_funnnel_v2i32:
377 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
378 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
379 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
380 ; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
381 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
382 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
383 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
384 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
385 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
386 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
389 ; AVX2-LABEL: constant_funnnel_v2i32:
391 ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
392 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
393 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
396 ; AVX512F-LABEL: constant_funnnel_v2i32:
398 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
399 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
400 ; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0
401 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
402 ; AVX512F-NEXT: vzeroupper
405 ; AVX512VL-LABEL: constant_funnnel_v2i32:
407 ; AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
408 ; AVX512VL-NEXT: retq
410 ; AVX512BW-LABEL: constant_funnnel_v2i32:
412 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
413 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
414 ; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0
415 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
416 ; AVX512BW-NEXT: vzeroupper
417 ; AVX512BW-NEXT: retq
419 ; AVX512VLBW-LABEL: constant_funnnel_v2i32:
420 ; AVX512VLBW: # %bb.0:
421 ; AVX512VLBW-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
422 ; AVX512VLBW-NEXT: retq
424 ; AVX512VBMI2-LABEL: constant_funnnel_v2i32:
425 ; AVX512VBMI2: # %bb.0:
426 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
427 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
428 ; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0
429 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
430 ; AVX512VBMI2-NEXT: vzeroupper
431 ; AVX512VBMI2-NEXT: retq
433 ; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32:
434 ; AVX512VLVBMI2: # %bb.0:
435 ; AVX512VLVBMI2-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
436 ; AVX512VLVBMI2-NEXT: retq
438 ; XOP-LABEL: constant_funnnel_v2i32:
440 ; XOP-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
443 ; X86-SSE2-LABEL: constant_funnnel_v2i32:
445 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
446 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
447 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
448 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
449 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
450 ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
451 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
452 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
453 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
454 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
455 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
456 ; X86-SSE2-NEXT: por %xmm3, %xmm0
457 ; X86-SSE2-NEXT: retl
458 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>)
463 ; Uniform Constant Shifts
466 define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
467 ; SSE2-LABEL: splatconstant_funnnel_v2i32:
469 ; SSE2-NEXT: movdqa %xmm0, %xmm2
470 ; SSE2-NEXT: psrld $4, %xmm2
471 ; SSE2-NEXT: movdqa %xmm0, %xmm1
472 ; SSE2-NEXT: pslld $28, %xmm1
473 ; SSE2-NEXT: por %xmm2, %xmm1
474 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
475 ; SSE2-NEXT: movaps %xmm1, %xmm0
478 ; SSE41-LABEL: splatconstant_funnnel_v2i32:
480 ; SSE41-NEXT: movdqa %xmm0, %xmm2
481 ; SSE41-NEXT: psrld $4, %xmm2
482 ; SSE41-NEXT: movdqa %xmm0, %xmm1
483 ; SSE41-NEXT: pslld $28, %xmm1
484 ; SSE41-NEXT: por %xmm2, %xmm1
485 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
486 ; SSE41-NEXT: movdqa %xmm1, %xmm0
489 ; AVX1-LABEL: splatconstant_funnnel_v2i32:
491 ; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1
492 ; AVX1-NEXT: vpslld $28, %xmm0, %xmm2
493 ; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
494 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
497 ; AVX2-LABEL: splatconstant_funnnel_v2i32:
499 ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
500 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
501 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
504 ; AVX512F-LABEL: splatconstant_funnnel_v2i32:
506 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
507 ; AVX512F-NEXT: vprord $4, %zmm0, %zmm0
508 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
509 ; AVX512F-NEXT: vzeroupper
512 ; AVX512VL-LABEL: splatconstant_funnnel_v2i32:
514 ; AVX512VL-NEXT: vprord $4, %xmm0, %xmm0
515 ; AVX512VL-NEXT: retq
517 ; AVX512BW-LABEL: splatconstant_funnnel_v2i32:
519 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
520 ; AVX512BW-NEXT: vprord $4, %zmm0, %zmm0
521 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
522 ; AVX512BW-NEXT: vzeroupper
523 ; AVX512BW-NEXT: retq
525 ; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32:
526 ; AVX512VLBW: # %bb.0:
527 ; AVX512VLBW-NEXT: vprord $4, %xmm0, %xmm0
528 ; AVX512VLBW-NEXT: retq
530 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32:
531 ; AVX512VBMI2: # %bb.0:
532 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
533 ; AVX512VBMI2-NEXT: vprord $4, %zmm0, %zmm0
534 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
535 ; AVX512VBMI2-NEXT: vzeroupper
536 ; AVX512VBMI2-NEXT: retq
538 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32:
539 ; AVX512VLVBMI2: # %bb.0:
540 ; AVX512VLVBMI2-NEXT: vprord $4, %xmm0, %xmm0
541 ; AVX512VLVBMI2-NEXT: retq
543 ; XOP-LABEL: splatconstant_funnnel_v2i32:
545 ; XOP-NEXT: vprotd $28, %xmm0, %xmm0
548 ; X86-SSE2-LABEL: splatconstant_funnnel_v2i32:
550 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
551 ; X86-SSE2-NEXT: psrld $4, %xmm2
552 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
553 ; X86-SSE2-NEXT: pslld $28, %xmm1
554 ; X86-SSE2-NEXT: por %xmm2, %xmm1
555 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
556 ; X86-SSE2-NEXT: movaps %xmm1, %xmm0
557 ; X86-SSE2-NEXT: retl
558 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)