1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X86-SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X64-SSE
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
14 define i64 @test_v2i64(<2 x i64> %a0) nounwind {
15 ; X86-SSE-LABEL: test_v2i64:
17 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
18 ; X86-SSE-NEXT: pand %xmm0, %xmm1
19 ; X86-SSE-NEXT: movd %xmm1, %eax
20 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
21 ; X86-SSE-NEXT: movd %xmm0, %edx
24 ; X64-SSE-LABEL: test_v2i64:
26 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
27 ; X64-SSE-NEXT: pand %xmm0, %xmm1
28 ; X64-SSE-NEXT: movq %xmm1, %rax
31 ; AVX-LABEL: test_v2i64:
33 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
34 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
35 ; AVX-NEXT: vmovq %xmm0, %rax
37 %1 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a0)
41 define i64 @test_v4i64(<4 x i64> %a0) nounwind {
42 ; X86-SSE-LABEL: test_v4i64:
44 ; X86-SSE-NEXT: pand %xmm1, %xmm0
45 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
46 ; X86-SSE-NEXT: pand %xmm0, %xmm1
47 ; X86-SSE-NEXT: movd %xmm1, %eax
48 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
49 ; X86-SSE-NEXT: movd %xmm0, %edx
52 ; X64-SSE-LABEL: test_v4i64:
54 ; X64-SSE-NEXT: pand %xmm1, %xmm0
55 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
56 ; X64-SSE-NEXT: pand %xmm0, %xmm1
57 ; X64-SSE-NEXT: movq %xmm1, %rax
60 ; AVX1-LABEL: test_v4i64:
62 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
63 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
64 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
65 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
66 ; AVX1-NEXT: vmovq %xmm0, %rax
67 ; AVX1-NEXT: vzeroupper
70 ; AVX2-LABEL: test_v4i64:
72 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
73 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
74 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
75 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
76 ; AVX2-NEXT: vmovq %xmm0, %rax
77 ; AVX2-NEXT: vzeroupper
80 ; AVX512-LABEL: test_v4i64:
82 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
83 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
84 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
85 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
86 ; AVX512-NEXT: vmovq %xmm0, %rax
87 ; AVX512-NEXT: vzeroupper
89 %1 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %a0)
93 define i64 @test_v8i64(<8 x i64> %a0) nounwind {
94 ; X86-SSE-LABEL: test_v8i64:
96 ; X86-SSE-NEXT: pushl %ebp
97 ; X86-SSE-NEXT: movl %esp, %ebp
98 ; X86-SSE-NEXT: andl $-16, %esp
99 ; X86-SSE-NEXT: subl $16, %esp
100 ; X86-SSE-NEXT: pand %xmm2, %xmm0
101 ; X86-SSE-NEXT: pand 8(%ebp), %xmm1
102 ; X86-SSE-NEXT: pand %xmm0, %xmm1
103 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
104 ; X86-SSE-NEXT: pand %xmm1, %xmm0
105 ; X86-SSE-NEXT: movd %xmm0, %eax
106 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
107 ; X86-SSE-NEXT: movd %xmm0, %edx
108 ; X86-SSE-NEXT: movl %ebp, %esp
109 ; X86-SSE-NEXT: popl %ebp
112 ; X64-SSE-LABEL: test_v8i64:
114 ; X64-SSE-NEXT: pand %xmm3, %xmm1
115 ; X64-SSE-NEXT: pand %xmm2, %xmm0
116 ; X64-SSE-NEXT: pand %xmm1, %xmm0
117 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
118 ; X64-SSE-NEXT: pand %xmm0, %xmm1
119 ; X64-SSE-NEXT: movq %xmm1, %rax
122 ; AVX1-LABEL: test_v8i64:
124 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
125 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
126 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
127 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
128 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
129 ; AVX1-NEXT: vmovq %xmm0, %rax
130 ; AVX1-NEXT: vzeroupper
133 ; AVX2-LABEL: test_v8i64:
135 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
136 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
137 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
138 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
139 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
140 ; AVX2-NEXT: vmovq %xmm0, %rax
141 ; AVX2-NEXT: vzeroupper
144 ; AVX512-LABEL: test_v8i64:
146 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
147 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
148 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
149 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
150 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
151 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
152 ; AVX512-NEXT: vmovq %xmm0, %rax
153 ; AVX512-NEXT: vzeroupper
155 %1 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a0)
159 define i64 @test_v16i64(<16 x i64> %a0) nounwind {
160 ; X86-SSE-LABEL: test_v16i64:
162 ; X86-SSE-NEXT: pushl %ebp
163 ; X86-SSE-NEXT: movl %esp, %ebp
164 ; X86-SSE-NEXT: andl $-16, %esp
165 ; X86-SSE-NEXT: subl $16, %esp
166 ; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3
167 ; X86-SSE-NEXT: pand 56(%ebp), %xmm2
168 ; X86-SSE-NEXT: pand 24(%ebp), %xmm0
169 ; X86-SSE-NEXT: pand %xmm2, %xmm0
170 ; X86-SSE-NEXT: pand 72(%ebp), %xmm3
171 ; X86-SSE-NEXT: pand 40(%ebp), %xmm1
172 ; X86-SSE-NEXT: pand %xmm3, %xmm1
173 ; X86-SSE-NEXT: pand %xmm0, %xmm1
174 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
175 ; X86-SSE-NEXT: pand %xmm1, %xmm0
176 ; X86-SSE-NEXT: movd %xmm0, %eax
177 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
178 ; X86-SSE-NEXT: movd %xmm0, %edx
179 ; X86-SSE-NEXT: movl %ebp, %esp
180 ; X86-SSE-NEXT: popl %ebp
183 ; X64-SSE-LABEL: test_v16i64:
185 ; X64-SSE-NEXT: pand %xmm6, %xmm2
186 ; X64-SSE-NEXT: pand %xmm4, %xmm0
187 ; X64-SSE-NEXT: pand %xmm2, %xmm0
188 ; X64-SSE-NEXT: pand %xmm7, %xmm3
189 ; X64-SSE-NEXT: pand %xmm5, %xmm1
190 ; X64-SSE-NEXT: pand %xmm3, %xmm1
191 ; X64-SSE-NEXT: pand %xmm0, %xmm1
192 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
193 ; X64-SSE-NEXT: pand %xmm1, %xmm0
194 ; X64-SSE-NEXT: movq %xmm0, %rax
197 ; AVX1-LABEL: test_v16i64:
199 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
200 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
201 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
202 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
203 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
204 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
205 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
206 ; AVX1-NEXT: vmovq %xmm0, %rax
207 ; AVX1-NEXT: vzeroupper
210 ; AVX2-LABEL: test_v16i64:
212 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
213 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
214 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
215 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
216 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
217 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
218 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
219 ; AVX2-NEXT: vmovq %xmm0, %rax
220 ; AVX2-NEXT: vzeroupper
223 ; AVX512-LABEL: test_v16i64:
225 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
226 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
227 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
228 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
229 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
230 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
231 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
232 ; AVX512-NEXT: vmovq %xmm0, %rax
233 ; AVX512-NEXT: vzeroupper
235 %1 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %a0)
243 define i32 @test_v2i32(<2 x i32> %a0) nounwind {
244 ; SSE-LABEL: test_v2i32:
246 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
247 ; SSE-NEXT: pand %xmm0, %xmm1
248 ; SSE-NEXT: movd %xmm1, %eax
249 ; SSE-NEXT: ret{{[l|q]}}
251 ; AVX-LABEL: test_v2i32:
253 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
254 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
255 ; AVX-NEXT: vmovd %xmm0, %eax
257 %1 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a0)
261 define i32 @test_v4i32(<4 x i32> %a0) nounwind {
262 ; SSE-LABEL: test_v4i32:
264 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
265 ; SSE-NEXT: pand %xmm0, %xmm1
266 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
267 ; SSE-NEXT: pand %xmm1, %xmm0
268 ; SSE-NEXT: movd %xmm0, %eax
269 ; SSE-NEXT: ret{{[l|q]}}
271 ; AVX-LABEL: test_v4i32:
273 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
274 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
275 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
276 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
277 ; AVX-NEXT: vmovd %xmm0, %eax
279 %1 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a0)
283 define i32 @test_v8i32(<8 x i32> %a0) nounwind {
284 ; SSE-LABEL: test_v8i32:
286 ; SSE-NEXT: pand %xmm1, %xmm0
287 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
288 ; SSE-NEXT: pand %xmm0, %xmm1
289 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
290 ; SSE-NEXT: pand %xmm1, %xmm0
291 ; SSE-NEXT: movd %xmm0, %eax
292 ; SSE-NEXT: ret{{[l|q]}}
294 ; AVX1-LABEL: test_v8i32:
296 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
297 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
298 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
299 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
300 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
301 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
302 ; AVX1-NEXT: vmovd %xmm0, %eax
303 ; AVX1-NEXT: vzeroupper
306 ; AVX2-LABEL: test_v8i32:
308 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
309 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
310 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
311 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
312 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
313 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
314 ; AVX2-NEXT: vmovd %xmm0, %eax
315 ; AVX2-NEXT: vzeroupper
318 ; AVX512-LABEL: test_v8i32:
320 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
321 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
322 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
323 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
324 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
325 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
326 ; AVX512-NEXT: vmovd %xmm0, %eax
327 ; AVX512-NEXT: vzeroupper
329 %1 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a0)
333 define i32 @test_v16i32(<16 x i32> %a0) nounwind {
334 ; X86-SSE-LABEL: test_v16i32:
336 ; X86-SSE-NEXT: pushl %ebp
337 ; X86-SSE-NEXT: movl %esp, %ebp
338 ; X86-SSE-NEXT: andl $-16, %esp
339 ; X86-SSE-NEXT: subl $16, %esp
340 ; X86-SSE-NEXT: pand %xmm2, %xmm0
341 ; X86-SSE-NEXT: pand 8(%ebp), %xmm1
342 ; X86-SSE-NEXT: pand %xmm0, %xmm1
343 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
344 ; X86-SSE-NEXT: pand %xmm1, %xmm0
345 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
346 ; X86-SSE-NEXT: pand %xmm0, %xmm1
347 ; X86-SSE-NEXT: movd %xmm1, %eax
348 ; X86-SSE-NEXT: movl %ebp, %esp
349 ; X86-SSE-NEXT: popl %ebp
352 ; X64-SSE-LABEL: test_v16i32:
354 ; X64-SSE-NEXT: pand %xmm3, %xmm1
355 ; X64-SSE-NEXT: pand %xmm2, %xmm0
356 ; X64-SSE-NEXT: pand %xmm1, %xmm0
357 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
358 ; X64-SSE-NEXT: pand %xmm0, %xmm1
359 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
360 ; X64-SSE-NEXT: pand %xmm1, %xmm0
361 ; X64-SSE-NEXT: movd %xmm0, %eax
364 ; AVX1-LABEL: test_v16i32:
366 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
367 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
368 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
369 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
370 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
371 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1]
372 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
373 ; AVX1-NEXT: vmovd %xmm0, %eax
374 ; AVX1-NEXT: vzeroupper
377 ; AVX2-LABEL: test_v16i32:
379 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
380 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
381 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
382 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
383 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
384 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
385 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
386 ; AVX2-NEXT: vmovd %xmm0, %eax
387 ; AVX2-NEXT: vzeroupper
390 ; AVX512-LABEL: test_v16i32:
392 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
393 ; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
394 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
395 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
396 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
397 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
398 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
399 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
400 ; AVX512-NEXT: vmovd %xmm0, %eax
401 ; AVX512-NEXT: vzeroupper
403 %1 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a0)
407 define i32 @test_v32i32(<32 x i32> %a0) nounwind {
408 ; X86-SSE-LABEL: test_v32i32:
410 ; X86-SSE-NEXT: pushl %ebp
411 ; X86-SSE-NEXT: movl %esp, %ebp
412 ; X86-SSE-NEXT: andl $-16, %esp
413 ; X86-SSE-NEXT: subl $16, %esp
414 ; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3
415 ; X86-SSE-NEXT: pand 56(%ebp), %xmm2
416 ; X86-SSE-NEXT: pand 24(%ebp), %xmm0
417 ; X86-SSE-NEXT: pand %xmm2, %xmm0
418 ; X86-SSE-NEXT: pand 72(%ebp), %xmm3
419 ; X86-SSE-NEXT: pand 40(%ebp), %xmm1
420 ; X86-SSE-NEXT: pand %xmm3, %xmm1
421 ; X86-SSE-NEXT: pand %xmm0, %xmm1
422 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
423 ; X86-SSE-NEXT: pand %xmm1, %xmm0
424 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
425 ; X86-SSE-NEXT: pand %xmm0, %xmm1
426 ; X86-SSE-NEXT: movd %xmm1, %eax
427 ; X86-SSE-NEXT: movl %ebp, %esp
428 ; X86-SSE-NEXT: popl %ebp
431 ; X64-SSE-LABEL: test_v32i32:
433 ; X64-SSE-NEXT: pand %xmm6, %xmm2
434 ; X64-SSE-NEXT: pand %xmm4, %xmm0
435 ; X64-SSE-NEXT: pand %xmm2, %xmm0
436 ; X64-SSE-NEXT: pand %xmm7, %xmm3
437 ; X64-SSE-NEXT: pand %xmm5, %xmm1
438 ; X64-SSE-NEXT: pand %xmm3, %xmm1
439 ; X64-SSE-NEXT: pand %xmm0, %xmm1
440 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
441 ; X64-SSE-NEXT: pand %xmm1, %xmm0
442 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
443 ; X64-SSE-NEXT: pand %xmm0, %xmm1
444 ; X64-SSE-NEXT: movd %xmm1, %eax
447 ; AVX1-LABEL: test_v32i32:
449 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
450 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
451 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
452 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
453 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
454 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
455 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
456 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1]
457 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
458 ; AVX1-NEXT: vmovd %xmm0, %eax
459 ; AVX1-NEXT: vzeroupper
462 ; AVX2-LABEL: test_v32i32:
464 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
465 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
466 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
467 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
468 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
469 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
470 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
471 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
472 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
473 ; AVX2-NEXT: vmovd %xmm0, %eax
474 ; AVX2-NEXT: vzeroupper
477 ; AVX512-LABEL: test_v32i32:
479 ; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
480 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
481 ; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
482 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
483 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
484 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
485 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
486 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
487 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
488 ; AVX512-NEXT: vmovd %xmm0, %eax
489 ; AVX512-NEXT: vzeroupper
491 %1 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %a0)
499 define i16 @test_v2i16(<2 x i16> %a0) nounwind {
500 ; SSE-LABEL: test_v2i16:
502 ; SSE-NEXT: movdqa %xmm0, %xmm1
503 ; SSE-NEXT: psrld $16, %xmm1
504 ; SSE-NEXT: pand %xmm0, %xmm1
505 ; SSE-NEXT: movd %xmm1, %eax
506 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
507 ; SSE-NEXT: ret{{[l|q]}}
509 ; AVX-LABEL: test_v2i16:
511 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
512 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
513 ; AVX-NEXT: vmovd %xmm0, %eax
514 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
516 %1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0)
520 define i16 @test_v4i16(<4 x i16> %a0) nounwind {
521 ; SSE-LABEL: test_v4i16:
523 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
524 ; SSE-NEXT: pand %xmm0, %xmm1
525 ; SSE-NEXT: movdqa %xmm1, %xmm0
526 ; SSE-NEXT: psrld $16, %xmm0
527 ; SSE-NEXT: pand %xmm1, %xmm0
528 ; SSE-NEXT: movd %xmm0, %eax
529 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
530 ; SSE-NEXT: ret{{[l|q]}}
532 ; AVX-LABEL: test_v4i16:
534 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
535 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
536 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
537 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
538 ; AVX-NEXT: vmovd %xmm0, %eax
539 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
541 %1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0)
545 define i16 @test_v8i16(<8 x i16> %a0) nounwind {
546 ; SSE-LABEL: test_v8i16:
548 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
549 ; SSE-NEXT: pand %xmm0, %xmm1
550 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
551 ; SSE-NEXT: pand %xmm1, %xmm0
552 ; SSE-NEXT: movdqa %xmm0, %xmm1
553 ; SSE-NEXT: psrld $16, %xmm1
554 ; SSE-NEXT: pand %xmm0, %xmm1
555 ; SSE-NEXT: movd %xmm1, %eax
556 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
557 ; SSE-NEXT: ret{{[l|q]}}
559 ; AVX-LABEL: test_v8i16:
561 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
562 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
563 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
564 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
565 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
566 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
567 ; AVX-NEXT: vmovd %xmm0, %eax
568 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
570 %1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0)
574 define i16 @test_v16i16(<16 x i16> %a0) nounwind {
575 ; SSE-LABEL: test_v16i16:
577 ; SSE-NEXT: pand %xmm1, %xmm0
578 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
579 ; SSE-NEXT: pand %xmm0, %xmm1
580 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
581 ; SSE-NEXT: pand %xmm1, %xmm0
582 ; SSE-NEXT: movdqa %xmm0, %xmm1
583 ; SSE-NEXT: psrld $16, %xmm1
584 ; SSE-NEXT: pand %xmm0, %xmm1
585 ; SSE-NEXT: movd %xmm1, %eax
586 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
587 ; SSE-NEXT: ret{{[l|q]}}
589 ; AVX1-LABEL: test_v16i16:
591 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
592 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
593 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
594 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
595 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
596 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
597 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
598 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
599 ; AVX1-NEXT: vmovd %xmm0, %eax
600 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
601 ; AVX1-NEXT: vzeroupper
604 ; AVX2-LABEL: test_v16i16:
606 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
607 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
608 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
609 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
610 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
611 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
612 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
613 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
614 ; AVX2-NEXT: vmovd %xmm0, %eax
615 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
616 ; AVX2-NEXT: vzeroupper
619 ; AVX512-LABEL: test_v16i16:
621 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
622 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
623 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
624 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
625 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
626 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
627 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
628 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
629 ; AVX512-NEXT: vmovd %xmm0, %eax
630 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
631 ; AVX512-NEXT: vzeroupper
633 %1 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a0)
637 define i16 @test_v32i16(<32 x i16> %a0) nounwind {
638 ; X86-SSE-LABEL: test_v32i16:
640 ; X86-SSE-NEXT: pushl %ebp
641 ; X86-SSE-NEXT: movl %esp, %ebp
642 ; X86-SSE-NEXT: andl $-16, %esp
643 ; X86-SSE-NEXT: subl $16, %esp
644 ; X86-SSE-NEXT: pand %xmm2, %xmm0
645 ; X86-SSE-NEXT: pand 8(%ebp), %xmm1
646 ; X86-SSE-NEXT: pand %xmm0, %xmm1
647 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
648 ; X86-SSE-NEXT: pand %xmm1, %xmm0
649 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
650 ; X86-SSE-NEXT: pand %xmm0, %xmm1
651 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0
652 ; X86-SSE-NEXT: psrld $16, %xmm0
653 ; X86-SSE-NEXT: pand %xmm1, %xmm0
654 ; X86-SSE-NEXT: movd %xmm0, %eax
655 ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
656 ; X86-SSE-NEXT: movl %ebp, %esp
657 ; X86-SSE-NEXT: popl %ebp
660 ; X64-SSE-LABEL: test_v32i16:
662 ; X64-SSE-NEXT: pand %xmm3, %xmm1
663 ; X64-SSE-NEXT: pand %xmm2, %xmm0
664 ; X64-SSE-NEXT: pand %xmm1, %xmm0
665 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
666 ; X64-SSE-NEXT: pand %xmm0, %xmm1
667 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
668 ; X64-SSE-NEXT: pand %xmm1, %xmm0
669 ; X64-SSE-NEXT: movdqa %xmm0, %xmm1
670 ; X64-SSE-NEXT: psrld $16, %xmm1
671 ; X64-SSE-NEXT: pand %xmm0, %xmm1
672 ; X64-SSE-NEXT: movd %xmm1, %eax
673 ; X64-SSE-NEXT: # kill: def $ax killed $ax killed $eax
676 ; AVX1-LABEL: test_v32i16:
678 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
679 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
680 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
681 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
682 ; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
683 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1]
684 ; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
685 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
686 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
687 ; AVX1-NEXT: vmovd %xmm0, %eax
688 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
689 ; AVX1-NEXT: vzeroupper
692 ; AVX2-LABEL: test_v32i16:
694 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
695 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
696 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
697 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
698 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
699 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
700 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
701 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
702 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
703 ; AVX2-NEXT: vmovd %xmm0, %eax
704 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
705 ; AVX2-NEXT: vzeroupper
708 ; AVX512-LABEL: test_v32i16:
710 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
711 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
712 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
713 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
714 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
715 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
716 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
717 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
718 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
719 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
720 ; AVX512-NEXT: vmovd %xmm0, %eax
721 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
722 ; AVX512-NEXT: vzeroupper
724 %1 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %a0)
728 define i16 @test_v64i16(<64 x i16> %a0) nounwind {
729 ; X86-SSE-LABEL: test_v64i16:
731 ; X86-SSE-NEXT: pushl %ebp
732 ; X86-SSE-NEXT: movl %esp, %ebp
733 ; X86-SSE-NEXT: andl $-16, %esp
734 ; X86-SSE-NEXT: subl $16, %esp
735 ; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3
736 ; X86-SSE-NEXT: pand 56(%ebp), %xmm2
737 ; X86-SSE-NEXT: pand 24(%ebp), %xmm0
738 ; X86-SSE-NEXT: pand %xmm2, %xmm0
739 ; X86-SSE-NEXT: pand 72(%ebp), %xmm3
740 ; X86-SSE-NEXT: pand 40(%ebp), %xmm1
741 ; X86-SSE-NEXT: pand %xmm3, %xmm1
742 ; X86-SSE-NEXT: pand %xmm0, %xmm1
743 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
744 ; X86-SSE-NEXT: pand %xmm1, %xmm0
745 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
746 ; X86-SSE-NEXT: pand %xmm0, %xmm1
747 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0
748 ; X86-SSE-NEXT: psrld $16, %xmm0
749 ; X86-SSE-NEXT: pand %xmm1, %xmm0
750 ; X86-SSE-NEXT: movd %xmm0, %eax
751 ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
752 ; X86-SSE-NEXT: movl %ebp, %esp
753 ; X86-SSE-NEXT: popl %ebp
756 ; X64-SSE-LABEL: test_v64i16:
758 ; X64-SSE-NEXT: pand %xmm6, %xmm2
759 ; X64-SSE-NEXT: pand %xmm4, %xmm0
760 ; X64-SSE-NEXT: pand %xmm2, %xmm0
761 ; X64-SSE-NEXT: pand %xmm7, %xmm3
762 ; X64-SSE-NEXT: pand %xmm5, %xmm1
763 ; X64-SSE-NEXT: pand %xmm3, %xmm1
764 ; X64-SSE-NEXT: pand %xmm0, %xmm1
765 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
766 ; X64-SSE-NEXT: pand %xmm1, %xmm0
767 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
768 ; X64-SSE-NEXT: pand %xmm0, %xmm1
769 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0
770 ; X64-SSE-NEXT: psrld $16, %xmm0
771 ; X64-SSE-NEXT: pand %xmm1, %xmm0
772 ; X64-SSE-NEXT: movd %xmm0, %eax
773 ; X64-SSE-NEXT: # kill: def $ax killed $ax killed $eax
776 ; AVX1-LABEL: test_v64i16:
778 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
779 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
780 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
781 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
782 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
783 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
784 ; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
785 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1]
786 ; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
787 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
788 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
789 ; AVX1-NEXT: vmovd %xmm0, %eax
790 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
791 ; AVX1-NEXT: vzeroupper
794 ; AVX2-LABEL: test_v64i16:
796 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
797 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
798 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
799 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
800 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
801 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
802 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
803 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
804 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
805 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
806 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
807 ; AVX2-NEXT: vmovd %xmm0, %eax
808 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
809 ; AVX2-NEXT: vzeroupper
812 ; AVX512-LABEL: test_v64i16:
814 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
815 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
816 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
817 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
818 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
819 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
820 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
821 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
822 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
823 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
824 ; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0
825 ; AVX512-NEXT: vmovd %xmm0, %eax
826 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
827 ; AVX512-NEXT: vzeroupper
829 %1 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %a0)
837 define i8 @test_v2i8(<2 x i8> %a0) nounwind {
838 ; SSE-LABEL: test_v2i8:
840 ; SSE-NEXT: movdqa %xmm0, %xmm1
841 ; SSE-NEXT: psrlw $8, %xmm1
842 ; SSE-NEXT: pand %xmm0, %xmm1
843 ; SSE-NEXT: movd %xmm1, %eax
844 ; SSE-NEXT: # kill: def $al killed $al killed $eax
845 ; SSE-NEXT: ret{{[l|q]}}
847 ; AVX-LABEL: test_v2i8:
849 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
850 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
851 ; AVX-NEXT: vmovd %xmm0, %eax
852 ; AVX-NEXT: # kill: def $al killed $al killed $eax
854 %1 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %a0)
858 define i8 @test_v4i8(<4 x i8> %a0) nounwind {
859 ; SSE-LABEL: test_v4i8:
861 ; SSE-NEXT: movdqa %xmm0, %xmm1
862 ; SSE-NEXT: psrld $16, %xmm1
863 ; SSE-NEXT: pand %xmm0, %xmm1
864 ; SSE-NEXT: movdqa %xmm1, %xmm0
865 ; SSE-NEXT: psrlw $8, %xmm0
866 ; SSE-NEXT: pand %xmm1, %xmm0
867 ; SSE-NEXT: movd %xmm0, %eax
868 ; SSE-NEXT: # kill: def $al killed $al killed $eax
869 ; SSE-NEXT: ret{{[l|q]}}
871 ; AVX-LABEL: test_v4i8:
873 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
874 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
875 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
876 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
877 ; AVX-NEXT: vmovd %xmm0, %eax
878 ; AVX-NEXT: # kill: def $al killed $al killed $eax
880 %1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0)
884 define i8 @test_v8i8(<8 x i8> %a0) nounwind {
885 ; SSE-LABEL: test_v8i8:
887 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
888 ; SSE-NEXT: pand %xmm0, %xmm1
889 ; SSE-NEXT: movdqa %xmm1, %xmm0
890 ; SSE-NEXT: psrld $16, %xmm0
891 ; SSE-NEXT: pand %xmm1, %xmm0
892 ; SSE-NEXT: movdqa %xmm0, %xmm1
893 ; SSE-NEXT: psrlw $8, %xmm1
894 ; SSE-NEXT: pand %xmm0, %xmm1
895 ; SSE-NEXT: movd %xmm1, %eax
896 ; SSE-NEXT: # kill: def $al killed $al killed $eax
897 ; SSE-NEXT: ret{{[l|q]}}
899 ; AVX-LABEL: test_v8i8:
901 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
902 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
903 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
904 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
905 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
906 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
907 ; AVX-NEXT: vmovd %xmm0, %eax
908 ; AVX-NEXT: # kill: def $al killed $al killed $eax
910 %1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0)
914 define i8 @test_v16i8(<16 x i8> %a0) nounwind {
915 ; SSE-LABEL: test_v16i8:
917 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
918 ; SSE-NEXT: pand %xmm0, %xmm1
919 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
920 ; SSE-NEXT: pand %xmm1, %xmm0
921 ; SSE-NEXT: movdqa %xmm0, %xmm1
922 ; SSE-NEXT: psrld $16, %xmm1
923 ; SSE-NEXT: pand %xmm0, %xmm1
924 ; SSE-NEXT: movdqa %xmm1, %xmm0
925 ; SSE-NEXT: psrlw $8, %xmm0
926 ; SSE-NEXT: pand %xmm1, %xmm0
927 ; SSE-NEXT: movd %xmm0, %eax
928 ; SSE-NEXT: # kill: def $al killed $al killed $eax
929 ; SSE-NEXT: ret{{[l|q]}}
931 ; AVX-LABEL: test_v16i8:
933 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
934 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
935 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
936 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
937 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
938 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
939 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
940 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
941 ; AVX-NEXT: vmovd %xmm0, %eax
942 ; AVX-NEXT: # kill: def $al killed $al killed $eax
944 %1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0)
948 define i8 @test_v32i8(<32 x i8> %a0) nounwind {
949 ; SSE-LABEL: test_v32i8:
951 ; SSE-NEXT: pand %xmm1, %xmm0
952 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
953 ; SSE-NEXT: pand %xmm0, %xmm1
954 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
955 ; SSE-NEXT: pand %xmm1, %xmm0
956 ; SSE-NEXT: movdqa %xmm0, %xmm1
957 ; SSE-NEXT: psrld $16, %xmm1
958 ; SSE-NEXT: pand %xmm0, %xmm1
959 ; SSE-NEXT: movdqa %xmm1, %xmm0
960 ; SSE-NEXT: psrlw $8, %xmm0
961 ; SSE-NEXT: pand %xmm1, %xmm0
962 ; SSE-NEXT: movd %xmm0, %eax
963 ; SSE-NEXT: # kill: def $al killed $al killed $eax
964 ; SSE-NEXT: ret{{[l|q]}}
966 ; AVX1-LABEL: test_v32i8:
968 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
969 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
970 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
971 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
972 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
973 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
974 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
975 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
976 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
977 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
978 ; AVX1-NEXT: vmovd %xmm0, %eax
979 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
980 ; AVX1-NEXT: vzeroupper
983 ; AVX2-LABEL: test_v32i8:
985 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
986 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
987 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
988 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
989 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
990 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
991 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
992 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
993 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
994 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
995 ; AVX2-NEXT: vmovd %xmm0, %eax
996 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
997 ; AVX2-NEXT: vzeroupper
1000 ; AVX512-LABEL: test_v32i8:
1002 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1003 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1004 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1005 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1006 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1007 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1008 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1009 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1010 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1011 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1012 ; AVX512-NEXT: vmovd %xmm0, %eax
1013 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
1014 ; AVX512-NEXT: vzeroupper
1016 %1 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a0)
1020 define i8 @test_v64i8(<64 x i8> %a0) nounwind {
1021 ; X86-SSE-LABEL: test_v64i8:
1023 ; X86-SSE-NEXT: pushl %ebp
1024 ; X86-SSE-NEXT: movl %esp, %ebp
1025 ; X86-SSE-NEXT: andl $-16, %esp
1026 ; X86-SSE-NEXT: subl $16, %esp
1027 ; X86-SSE-NEXT: pand %xmm2, %xmm0
1028 ; X86-SSE-NEXT: pand 8(%ebp), %xmm1
1029 ; X86-SSE-NEXT: pand %xmm0, %xmm1
1030 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1031 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1032 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1033 ; X86-SSE-NEXT: pand %xmm0, %xmm1
1034 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0
1035 ; X86-SSE-NEXT: psrld $16, %xmm0
1036 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1037 ; X86-SSE-NEXT: movdqa %xmm0, %xmm1
1038 ; X86-SSE-NEXT: psrlw $8, %xmm1
1039 ; X86-SSE-NEXT: pand %xmm0, %xmm1
1040 ; X86-SSE-NEXT: movd %xmm1, %eax
1041 ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
1042 ; X86-SSE-NEXT: movl %ebp, %esp
1043 ; X86-SSE-NEXT: popl %ebp
1044 ; X86-SSE-NEXT: retl
1046 ; X64-SSE-LABEL: test_v64i8:
1048 ; X64-SSE-NEXT: pand %xmm3, %xmm1
1049 ; X64-SSE-NEXT: pand %xmm2, %xmm0
1050 ; X64-SSE-NEXT: pand %xmm1, %xmm0
1051 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1052 ; X64-SSE-NEXT: pand %xmm0, %xmm1
1053 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1054 ; X64-SSE-NEXT: pand %xmm1, %xmm0
1055 ; X64-SSE-NEXT: movdqa %xmm0, %xmm1
1056 ; X64-SSE-NEXT: psrld $16, %xmm1
1057 ; X64-SSE-NEXT: pand %xmm0, %xmm1
1058 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0
1059 ; X64-SSE-NEXT: psrlw $8, %xmm0
1060 ; X64-SSE-NEXT: pand %xmm1, %xmm0
1061 ; X64-SSE-NEXT: movd %xmm0, %eax
1062 ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
1063 ; X64-SSE-NEXT: retq
1065 ; AVX1-LABEL: test_v64i8:
1067 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
1068 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1069 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
1070 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1071 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
1072 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1]
1073 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
1074 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
1075 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1076 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1077 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1078 ; AVX1-NEXT: vmovd %xmm0, %eax
1079 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
1080 ; AVX1-NEXT: vzeroupper
1083 ; AVX2-LABEL: test_v64i8:
1085 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1086 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1087 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1088 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1089 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1090 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1091 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1092 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
1093 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1094 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1095 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1096 ; AVX2-NEXT: vmovd %xmm0, %eax
1097 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
1098 ; AVX2-NEXT: vzeroupper
1101 ; AVX512-LABEL: test_v64i8:
1103 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1104 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
1105 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1106 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1107 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1108 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1109 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1110 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1111 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1112 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1113 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1114 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1115 ; AVX512-NEXT: vmovd %xmm0, %eax
1116 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
1117 ; AVX512-NEXT: vzeroupper
1119 %1 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %a0)
1123 define i8 @test_v128i8(<128 x i8> %a0) nounwind {
1124 ; X86-SSE-LABEL: test_v128i8:
1126 ; X86-SSE-NEXT: pushl %ebp
1127 ; X86-SSE-NEXT: movl %esp, %ebp
1128 ; X86-SSE-NEXT: andl $-16, %esp
1129 ; X86-SSE-NEXT: subl $16, %esp
1130 ; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3
1131 ; X86-SSE-NEXT: pand 56(%ebp), %xmm2
1132 ; X86-SSE-NEXT: pand 24(%ebp), %xmm0
1133 ; X86-SSE-NEXT: pand %xmm2, %xmm0
1134 ; X86-SSE-NEXT: pand 72(%ebp), %xmm3
1135 ; X86-SSE-NEXT: pand 40(%ebp), %xmm1
1136 ; X86-SSE-NEXT: pand %xmm3, %xmm1
1137 ; X86-SSE-NEXT: pand %xmm0, %xmm1
1138 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1139 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1140 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1141 ; X86-SSE-NEXT: pand %xmm0, %xmm1
1142 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0
1143 ; X86-SSE-NEXT: psrld $16, %xmm0
1144 ; X86-SSE-NEXT: pand %xmm1, %xmm0
1145 ; X86-SSE-NEXT: movdqa %xmm0, %xmm1
1146 ; X86-SSE-NEXT: psrlw $8, %xmm1
1147 ; X86-SSE-NEXT: pand %xmm0, %xmm1
1148 ; X86-SSE-NEXT: movd %xmm1, %eax
1149 ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
1150 ; X86-SSE-NEXT: movl %ebp, %esp
1151 ; X86-SSE-NEXT: popl %ebp
1152 ; X86-SSE-NEXT: retl
1154 ; X64-SSE-LABEL: test_v128i8:
1156 ; X64-SSE-NEXT: pand %xmm6, %xmm2
1157 ; X64-SSE-NEXT: pand %xmm4, %xmm0
1158 ; X64-SSE-NEXT: pand %xmm2, %xmm0
1159 ; X64-SSE-NEXT: pand %xmm7, %xmm3
1160 ; X64-SSE-NEXT: pand %xmm5, %xmm1
1161 ; X64-SSE-NEXT: pand %xmm3, %xmm1
1162 ; X64-SSE-NEXT: pand %xmm0, %xmm1
1163 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1164 ; X64-SSE-NEXT: pand %xmm1, %xmm0
1165 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1166 ; X64-SSE-NEXT: pand %xmm0, %xmm1
1167 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0
1168 ; X64-SSE-NEXT: psrld $16, %xmm0
1169 ; X64-SSE-NEXT: pand %xmm1, %xmm0
1170 ; X64-SSE-NEXT: movdqa %xmm0, %xmm1
1171 ; X64-SSE-NEXT: psrlw $8, %xmm1
1172 ; X64-SSE-NEXT: pand %xmm0, %xmm1
1173 ; X64-SSE-NEXT: movd %xmm1, %eax
1174 ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
1175 ; X64-SSE-NEXT: retq
1177 ; AVX1-LABEL: test_v128i8:
1179 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
1180 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1181 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
1182 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1183 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
1184 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1185 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
1186 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1]
1187 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
1188 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
1189 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1190 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1191 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1192 ; AVX1-NEXT: vmovd %xmm0, %eax
1193 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
1194 ; AVX1-NEXT: vzeroupper
1197 ; AVX2-LABEL: test_v128i8:
1199 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
1200 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1201 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1202 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1203 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1204 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1205 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1206 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1207 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1208 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
1209 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1210 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1211 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1212 ; AVX2-NEXT: vmovd %xmm0, %eax
1213 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
1214 ; AVX2-NEXT: vzeroupper
1217 ; AVX512-LABEL: test_v128i8:
1219 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
1220 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1221 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
1222 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1223 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1224 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1225 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1226 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1227 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1228 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1229 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1230 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1231 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
1232 ; AVX512-NEXT: vmovd %xmm0, %eax
1233 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
1234 ; AVX512-NEXT: vzeroupper
1236 %1 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %a0)
1240 declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
1241 declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
1242 declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>)
1243 declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>)
1245 declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
1246 declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
1247 declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
1248 declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>)
1249 declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>)
1251 declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>)
1252 declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
1253 declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
1254 declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
1255 declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>)
1256 declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>)
1258 declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>)
1259 declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>)
1260 declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
1261 declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
1262 declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
1263 declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>)
1264 declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>)