1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
14 define i1 @test_v2i64(ptr %ptr) nounwind {
15 ; SSE2-LABEL: test_v2i64:
17 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
18 ; SSE2-NEXT: pcmpeqd (%rdi), %xmm0
19 ; SSE2-NEXT: movmskps %xmm0, %eax
20 ; SSE2-NEXT: xorl $15, %eax
24 ; SSE41-LABEL: test_v2i64:
26 ; SSE41-NEXT: movdqa (%rdi), %xmm0
27 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
28 ; SSE41-NEXT: ptest %xmm1, %xmm0
29 ; SSE41-NEXT: setb %al
32 ; AVX-LABEL: test_v2i64:
34 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
35 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
36 ; AVX-NEXT: vptest %xmm1, %xmm0
39 %vload = load <2 x i64>, ptr %ptr
40 %v0 = extractelement <2 x i64> %vload, i32 0
41 %v1 = extractelement <2 x i64> %vload, i32 1
42 %vreduce = and i64 %v0, %v1
43 %vcheck = icmp eq i64 %vreduce, -1
47 define i1 @test_v4i64(ptr %ptr) nounwind {
48 ; SSE2-LABEL: test_v4i64:
50 ; SSE2-NEXT: movdqa (%rdi), %xmm0
51 ; SSE2-NEXT: pand 16(%rdi), %xmm0
52 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
53 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
54 ; SSE2-NEXT: movmskps %xmm1, %eax
55 ; SSE2-NEXT: xorl $15, %eax
59 ; SSE41-LABEL: test_v4i64:
61 ; SSE41-NEXT: movdqa (%rdi), %xmm0
62 ; SSE41-NEXT: pand 16(%rdi), %xmm0
63 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
64 ; SSE41-NEXT: ptest %xmm1, %xmm0
65 ; SSE41-NEXT: setb %al
68 ; AVX1-LABEL: test_v4i64:
70 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
71 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
72 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
73 ; AVX1-NEXT: vptest %ymm1, %ymm0
75 ; AVX1-NEXT: vzeroupper
78 ; AVX2-LABEL: test_v4i64:
80 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
81 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
82 ; AVX2-NEXT: vptest %ymm1, %ymm0
84 ; AVX2-NEXT: vzeroupper
87 ; AVX512-LABEL: test_v4i64:
89 ; AVX512-NEXT: vmovdqa (%rdi), %ymm0
90 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
91 ; AVX512-NEXT: vptest %ymm1, %ymm0
92 ; AVX512-NEXT: setb %al
93 ; AVX512-NEXT: vzeroupper
95 %vload = load <4 x i64>, ptr %ptr
96 %v0 = extractelement <4 x i64> %vload, i32 0
97 %v1 = extractelement <4 x i64> %vload, i32 1
98 %v2 = extractelement <4 x i64> %vload, i32 2
99 %v3 = extractelement <4 x i64> %vload, i32 3
100 %vreduce01 = and i64 %v0, %v1
101 %vreduce23 = and i64 %v2, %v3
102 %vreduce = and i64 %vreduce01, %vreduce23
103 %vcheck = icmp eq i64 %vreduce, -1
107 define i1 @test_v8i64(ptr %ptr) nounwind {
108 ; SSE2-LABEL: test_v8i64:
110 ; SSE2-NEXT: movdqa (%rdi), %xmm0
111 ; SSE2-NEXT: movdqa 16(%rdi), %xmm1
112 ; SSE2-NEXT: pand 48(%rdi), %xmm1
113 ; SSE2-NEXT: pand 32(%rdi), %xmm0
114 ; SSE2-NEXT: pand %xmm1, %xmm0
115 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
116 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
117 ; SSE2-NEXT: movmskps %xmm1, %eax
118 ; SSE2-NEXT: xorl $15, %eax
119 ; SSE2-NEXT: sete %al
122 ; SSE41-LABEL: test_v8i64:
124 ; SSE41-NEXT: movdqa (%rdi), %xmm0
125 ; SSE41-NEXT: movdqa 16(%rdi), %xmm1
126 ; SSE41-NEXT: pand 48(%rdi), %xmm1
127 ; SSE41-NEXT: pand 32(%rdi), %xmm0
128 ; SSE41-NEXT: pand %xmm1, %xmm0
129 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
130 ; SSE41-NEXT: ptest %xmm1, %xmm0
131 ; SSE41-NEXT: setb %al
134 ; AVX1-LABEL: test_v8i64:
136 ; AVX1-NEXT: vmovaps (%rdi), %ymm0
137 ; AVX1-NEXT: vandps 32(%rdi), %ymm0, %ymm0
138 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
139 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
140 ; AVX1-NEXT: vptest %ymm1, %ymm0
141 ; AVX1-NEXT: setb %al
142 ; AVX1-NEXT: vzeroupper
145 ; AVX2-LABEL: test_v8i64:
147 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
148 ; AVX2-NEXT: vpand 32(%rdi), %ymm0, %ymm0
149 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
150 ; AVX2-NEXT: vptest %ymm1, %ymm0
151 ; AVX2-NEXT: setb %al
152 ; AVX2-NEXT: vzeroupper
155 ; AVX512-LABEL: test_v8i64:
157 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
158 ; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0
159 ; AVX512-NEXT: kortestw %k0, %k0
160 ; AVX512-NEXT: sete %al
161 ; AVX512-NEXT: vzeroupper
163 %vload = load <8 x i64>, ptr %ptr
164 %v0 = extractelement <8 x i64> %vload, i32 0
165 %v1 = extractelement <8 x i64> %vload, i32 1
166 %v2 = extractelement <8 x i64> %vload, i32 2
167 %v3 = extractelement <8 x i64> %vload, i32 3
168 %v4 = extractelement <8 x i64> %vload, i32 4
169 %v5 = extractelement <8 x i64> %vload, i32 5
170 %v6 = extractelement <8 x i64> %vload, i32 6
171 %v7 = extractelement <8 x i64> %vload, i32 7
172 %vreduce01 = and i64 %v0, %v1
173 %vreduce23 = and i64 %v2, %v3
174 %vreduce45 = and i64 %v4, %v5
175 %vreduce67 = and i64 %v6, %v7
176 %vreduce0123 = and i64 %vreduce01, %vreduce23
177 %vreduce4567 = and i64 %vreduce45, %vreduce67
178 %vreduce = and i64 %vreduce0123, %vreduce4567
179 %vcheck = icmp eq i64 %vreduce, -1
183 define i1 @test_v16i64(ptr %ptr) nounwind {
184 ; SSE2-LABEL: test_v16i64:
186 ; SSE2-NEXT: movdqa (%rdi), %xmm0
187 ; SSE2-NEXT: movdqa 16(%rdi), %xmm1
188 ; SSE2-NEXT: movdqa 32(%rdi), %xmm2
189 ; SSE2-NEXT: movdqa 48(%rdi), %xmm3
190 ; SSE2-NEXT: pand 112(%rdi), %xmm3
191 ; SSE2-NEXT: pand 80(%rdi), %xmm1
192 ; SSE2-NEXT: pand %xmm3, %xmm1
193 ; SSE2-NEXT: pand 96(%rdi), %xmm2
194 ; SSE2-NEXT: pand 64(%rdi), %xmm0
195 ; SSE2-NEXT: pand %xmm2, %xmm0
196 ; SSE2-NEXT: pand %xmm1, %xmm0
197 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
198 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
199 ; SSE2-NEXT: movmskps %xmm1, %eax
200 ; SSE2-NEXT: xorl $15, %eax
201 ; SSE2-NEXT: sete %al
204 ; SSE41-LABEL: test_v16i64:
206 ; SSE41-NEXT: movdqa (%rdi), %xmm0
207 ; SSE41-NEXT: movdqa 16(%rdi), %xmm1
208 ; SSE41-NEXT: movdqa 32(%rdi), %xmm2
209 ; SSE41-NEXT: movdqa 48(%rdi), %xmm3
210 ; SSE41-NEXT: pand 112(%rdi), %xmm3
211 ; SSE41-NEXT: pand 80(%rdi), %xmm1
212 ; SSE41-NEXT: pand %xmm3, %xmm1
213 ; SSE41-NEXT: pand 96(%rdi), %xmm2
214 ; SSE41-NEXT: pand 64(%rdi), %xmm0
215 ; SSE41-NEXT: pand %xmm2, %xmm0
216 ; SSE41-NEXT: pand %xmm1, %xmm0
217 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
218 ; SSE41-NEXT: ptest %xmm1, %xmm0
219 ; SSE41-NEXT: setb %al
222 ; AVX1-LABEL: test_v16i64:
224 ; AVX1-NEXT: vmovaps (%rdi), %ymm0
225 ; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
226 ; AVX1-NEXT: vandps 96(%rdi), %ymm1, %ymm1
227 ; AVX1-NEXT: vandps 64(%rdi), %ymm0, %ymm0
228 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
229 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
230 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
231 ; AVX1-NEXT: vptest %ymm1, %ymm0
232 ; AVX1-NEXT: setb %al
233 ; AVX1-NEXT: vzeroupper
236 ; AVX2-LABEL: test_v16i64:
238 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
239 ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
240 ; AVX2-NEXT: vpand 96(%rdi), %ymm1, %ymm1
241 ; AVX2-NEXT: vpand 64(%rdi), %ymm0, %ymm0
242 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
243 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
244 ; AVX2-NEXT: vptest %ymm1, %ymm0
245 ; AVX2-NEXT: setb %al
246 ; AVX2-NEXT: vzeroupper
249 ; AVX512-LABEL: test_v16i64:
251 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
252 ; AVX512-NEXT: vpandq 64(%rdi), %zmm0, %zmm0
253 ; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
254 ; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
255 ; AVX512-NEXT: kortestw %k0, %k0
256 ; AVX512-NEXT: sete %al
257 ; AVX512-NEXT: vzeroupper
259 %vload = load <16 x i64>, ptr %ptr
260 %v0 = extractelement <16 x i64> %vload, i32 0
261 %v1 = extractelement <16 x i64> %vload, i32 1
262 %v2 = extractelement <16 x i64> %vload, i32 2
263 %v3 = extractelement <16 x i64> %vload, i32 3
264 %v4 = extractelement <16 x i64> %vload, i32 4
265 %v5 = extractelement <16 x i64> %vload, i32 5
266 %v6 = extractelement <16 x i64> %vload, i32 6
267 %v7 = extractelement <16 x i64> %vload, i32 7
268 %v8 = extractelement <16 x i64> %vload, i32 8
269 %v9 = extractelement <16 x i64> %vload, i32 9
270 %v10 = extractelement <16 x i64> %vload, i32 10
271 %v11 = extractelement <16 x i64> %vload, i32 11
272 %v12 = extractelement <16 x i64> %vload, i32 12
273 %v13 = extractelement <16 x i64> %vload, i32 13
274 %v14 = extractelement <16 x i64> %vload, i32 14
275 %v15 = extractelement <16 x i64> %vload, i32 15
276 %vreduce01 = and i64 %v0, %v1
277 %vreduce23 = and i64 %v2, %v3
278 %vreduce45 = and i64 %v4, %v5
279 %vreduce67 = and i64 %v6, %v7
280 %vreduce89 = and i64 %v8, %v9
281 %vreduce1011 = and i64 %v10, %v11
282 %vreduce1213 = and i64 %v12, %v13
283 %vreduce1415 = and i64 %v14, %v15
284 %vreduce0123 = and i64 %vreduce01, %vreduce23
285 %vreduce4567 = and i64 %vreduce45, %vreduce67
286 %vreduce891011 = and i64 %vreduce89, %vreduce1011
287 %vreduce12131415 = and i64 %vreduce1213, %vreduce1415
288 %vreduce01234567 = and i64 %vreduce0123, %vreduce4567
289 %vreduce89101112131415 = and i64 %vreduce891011, %vreduce12131415
290 %vreduce = and i64 %vreduce01234567, %vreduce89101112131415
291 %vcheck = icmp eq i64 %vreduce, -1
299 define i1 @test_v2i32(ptr %ptr) nounwind {
300 ; SSE-LABEL: test_v2i32:
302 ; SSE-NEXT: cmpq $-1, (%rdi)
306 ; AVX-LABEL: test_v2i32:
308 ; AVX-NEXT: cmpq $-1, (%rdi)
311 %vload = load <2 x i32>, ptr %ptr
312 %v0 = extractelement <2 x i32> %vload, i32 0
313 %v1 = extractelement <2 x i32> %vload, i32 1
314 %vreduce = and i32 %v0, %v1
315 %vcheck = icmp eq i32 %vreduce, -1
319 define i1 @test_v4i32(ptr %ptr) nounwind {
320 ; SSE2-LABEL: test_v4i32:
322 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
323 ; SSE2-NEXT: pcmpeqd (%rdi), %xmm0
324 ; SSE2-NEXT: movmskps %xmm0, %eax
325 ; SSE2-NEXT: xorl $15, %eax
326 ; SSE2-NEXT: sete %al
329 ; SSE41-LABEL: test_v4i32:
331 ; SSE41-NEXT: movdqa (%rdi), %xmm0
332 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
333 ; SSE41-NEXT: ptest %xmm1, %xmm0
334 ; SSE41-NEXT: setb %al
337 ; AVX-LABEL: test_v4i32:
339 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
340 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
341 ; AVX-NEXT: vptest %xmm1, %xmm0
344 %vload = load <4 x i32>, ptr %ptr
345 %v0 = extractelement <4 x i32> %vload, i32 0
346 %v1 = extractelement <4 x i32> %vload, i32 1
347 %v2 = extractelement <4 x i32> %vload, i32 2
348 %v3 = extractelement <4 x i32> %vload, i32 3
349 %vreduce01 = and i32 %v0, %v1
350 %vreduce23 = and i32 %v2, %v3
351 %vreduce = and i32 %vreduce01, %vreduce23
352 %vcheck = icmp eq i32 %vreduce, -1
356 define i1 @test_v8i32(ptr %ptr) nounwind {
357 ; SSE2-LABEL: test_v8i32:
359 ; SSE2-NEXT: movdqa (%rdi), %xmm0
360 ; SSE2-NEXT: pand 16(%rdi), %xmm0
361 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
362 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
363 ; SSE2-NEXT: movmskps %xmm1, %eax
364 ; SSE2-NEXT: xorl $15, %eax
365 ; SSE2-NEXT: sete %al
368 ; SSE41-LABEL: test_v8i32:
370 ; SSE41-NEXT: movdqa (%rdi), %xmm0
371 ; SSE41-NEXT: pand 16(%rdi), %xmm0
372 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
373 ; SSE41-NEXT: ptest %xmm1, %xmm0
374 ; SSE41-NEXT: setb %al
377 ; AVX1-LABEL: test_v8i32:
379 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
380 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
381 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
382 ; AVX1-NEXT: vptest %ymm1, %ymm0
383 ; AVX1-NEXT: setb %al
384 ; AVX1-NEXT: vzeroupper
387 ; AVX2-LABEL: test_v8i32:
389 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
390 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
391 ; AVX2-NEXT: vptest %ymm1, %ymm0
392 ; AVX2-NEXT: setb %al
393 ; AVX2-NEXT: vzeroupper
396 ; AVX512-LABEL: test_v8i32:
398 ; AVX512-NEXT: vmovdqa (%rdi), %ymm0
399 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
400 ; AVX512-NEXT: vptest %ymm1, %ymm0
401 ; AVX512-NEXT: setb %al
402 ; AVX512-NEXT: vzeroupper
404 %vload = load <8 x i32>, ptr %ptr
405 %v0 = extractelement <8 x i32> %vload, i32 0
406 %v1 = extractelement <8 x i32> %vload, i32 1
407 %v2 = extractelement <8 x i32> %vload, i32 2
408 %v3 = extractelement <8 x i32> %vload, i32 3
409 %v4 = extractelement <8 x i32> %vload, i32 4
410 %v5 = extractelement <8 x i32> %vload, i32 5
411 %v6 = extractelement <8 x i32> %vload, i32 6
412 %v7 = extractelement <8 x i32> %vload, i32 7
413 %vreduce01 = and i32 %v0, %v1
414 %vreduce23 = and i32 %v2, %v3
415 %vreduce45 = and i32 %v4, %v5
416 %vreduce67 = and i32 %v6, %v7
417 %vreduce0123 = and i32 %vreduce01, %vreduce23
418 %vreduce4567 = and i32 %vreduce45, %vreduce67
419 %vreduce = and i32 %vreduce0123, %vreduce4567
420 %vcheck = icmp eq i32 %vreduce, -1
424 define i1 @test_v16i32(ptr %ptr) nounwind {
425 ; SSE2-LABEL: test_v16i32:
427 ; SSE2-NEXT: movdqa (%rdi), %xmm0
428 ; SSE2-NEXT: movdqa 16(%rdi), %xmm1
429 ; SSE2-NEXT: pand 48(%rdi), %xmm1
430 ; SSE2-NEXT: pand 32(%rdi), %xmm0
431 ; SSE2-NEXT: pand %xmm1, %xmm0
432 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
433 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
434 ; SSE2-NEXT: movmskps %xmm1, %eax
435 ; SSE2-NEXT: xorl $15, %eax
436 ; SSE2-NEXT: sete %al
439 ; SSE41-LABEL: test_v16i32:
441 ; SSE41-NEXT: movdqa (%rdi), %xmm0
442 ; SSE41-NEXT: movdqa 16(%rdi), %xmm1
443 ; SSE41-NEXT: pand 48(%rdi), %xmm1
444 ; SSE41-NEXT: pand 32(%rdi), %xmm0
445 ; SSE41-NEXT: pand %xmm1, %xmm0
446 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
447 ; SSE41-NEXT: ptest %xmm1, %xmm0
448 ; SSE41-NEXT: setb %al
451 ; AVX1-LABEL: test_v16i32:
453 ; AVX1-NEXT: vmovaps (%rdi), %ymm0
454 ; AVX1-NEXT: vandps 32(%rdi), %ymm0, %ymm0
455 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
456 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
457 ; AVX1-NEXT: vptest %ymm1, %ymm0
458 ; AVX1-NEXT: setb %al
459 ; AVX1-NEXT: vzeroupper
462 ; AVX2-LABEL: test_v16i32:
464 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
465 ; AVX2-NEXT: vpand 32(%rdi), %ymm0, %ymm0
466 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
467 ; AVX2-NEXT: vptest %ymm1, %ymm0
468 ; AVX2-NEXT: setb %al
469 ; AVX2-NEXT: vzeroupper
472 ; AVX512-LABEL: test_v16i32:
474 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
475 ; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0
476 ; AVX512-NEXT: kortestw %k0, %k0
477 ; AVX512-NEXT: sete %al
478 ; AVX512-NEXT: vzeroupper
480 %vload = load <16 x i32>, ptr %ptr
481 %v0 = extractelement <16 x i32> %vload, i32 0
482 %v1 = extractelement <16 x i32> %vload, i32 1
483 %v2 = extractelement <16 x i32> %vload, i32 2
484 %v3 = extractelement <16 x i32> %vload, i32 3
485 %v4 = extractelement <16 x i32> %vload, i32 4
486 %v5 = extractelement <16 x i32> %vload, i32 5
487 %v6 = extractelement <16 x i32> %vload, i32 6
488 %v7 = extractelement <16 x i32> %vload, i32 7
489 %v8 = extractelement <16 x i32> %vload, i32 8
490 %v9 = extractelement <16 x i32> %vload, i32 9
491 %v10 = extractelement <16 x i32> %vload, i32 10
492 %v11 = extractelement <16 x i32> %vload, i32 11
493 %v12 = extractelement <16 x i32> %vload, i32 12
494 %v13 = extractelement <16 x i32> %vload, i32 13
495 %v14 = extractelement <16 x i32> %vload, i32 14
496 %v15 = extractelement <16 x i32> %vload, i32 15
497 %vreduce01 = and i32 %v0, %v1
498 %vreduce23 = and i32 %v2, %v3
499 %vreduce45 = and i32 %v4, %v5
500 %vreduce67 = and i32 %v6, %v7
501 %vreduce89 = and i32 %v8, %v9
502 %vreduce1011 = and i32 %v10, %v11
503 %vreduce1213 = and i32 %v12, %v13
504 %vreduce1415 = and i32 %v14, %v15
505 %vreduce0123 = and i32 %vreduce01, %vreduce23
506 %vreduce4567 = and i32 %vreduce45, %vreduce67
507 %vreduce891011 = and i32 %vreduce89, %vreduce1011
508 %vreduce12131415 = and i32 %vreduce1213, %vreduce1415
509 %vreduce01234567 = and i32 %vreduce0123, %vreduce4567
510 %vreduce89101112131415 = and i32 %vreduce891011, %vreduce12131415
511 %vreduce = and i32 %vreduce01234567, %vreduce89101112131415
512 %vcheck = icmp eq i32 %vreduce, -1
520 define i1 @test_v2i16(ptr %ptr) nounwind {
521 ; SSE-LABEL: test_v2i16:
523 ; SSE-NEXT: cmpl $-1, (%rdi)
527 ; AVX-LABEL: test_v2i16:
529 ; AVX-NEXT: cmpl $-1, (%rdi)
532 %vload = load <2 x i16>, ptr %ptr
533 %v0 = extractelement <2 x i16> %vload, i32 0
534 %v1 = extractelement <2 x i16> %vload, i32 1
535 %vreduce = and i16 %v0, %v1
536 %vcheck = icmp eq i16 %vreduce, -1
540 define i1 @test_v4i16(ptr %ptr) nounwind {
541 ; SSE-LABEL: test_v4i16:
543 ; SSE-NEXT: cmpq $-1, (%rdi)
547 ; AVX-LABEL: test_v4i16:
549 ; AVX-NEXT: cmpq $-1, (%rdi)
552 %vload = load <4 x i16>, ptr %ptr
553 %v0 = extractelement <4 x i16> %vload, i32 0
554 %v1 = extractelement <4 x i16> %vload, i32 1
555 %v2 = extractelement <4 x i16> %vload, i32 2
556 %v3 = extractelement <4 x i16> %vload, i32 3
557 %vreduce01 = and i16 %v0, %v1
558 %vreduce23 = and i16 %v2, %v3
559 %vreduce = and i16 %vreduce01, %vreduce23
560 %vcheck = icmp eq i16 %vreduce, -1
564 define i1 @test_v8i16(ptr %ptr) nounwind {
565 ; SSE2-LABEL: test_v8i16:
567 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
568 ; SSE2-NEXT: pcmpeqb (%rdi), %xmm0
569 ; SSE2-NEXT: pmovmskb %xmm0, %eax
570 ; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
571 ; SSE2-NEXT: sete %al
574 ; SSE41-LABEL: test_v8i16:
576 ; SSE41-NEXT: movdqa (%rdi), %xmm0
577 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
578 ; SSE41-NEXT: ptest %xmm1, %xmm0
579 ; SSE41-NEXT: setb %al
582 ; AVX-LABEL: test_v8i16:
584 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
585 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
586 ; AVX-NEXT: vptest %xmm1, %xmm0
589 %vload = load <8 x i16>, ptr %ptr
590 %v0 = extractelement <8 x i16> %vload, i32 0
591 %v1 = extractelement <8 x i16> %vload, i32 1
592 %v2 = extractelement <8 x i16> %vload, i32 2
593 %v3 = extractelement <8 x i16> %vload, i32 3
594 %v4 = extractelement <8 x i16> %vload, i32 4
595 %v5 = extractelement <8 x i16> %vload, i32 5
596 %v6 = extractelement <8 x i16> %vload, i32 6
597 %v7 = extractelement <8 x i16> %vload, i32 7
598 %vreduce01 = and i16 %v0, %v1
599 %vreduce23 = and i16 %v2, %v3
600 %vreduce45 = and i16 %v4, %v5
601 %vreduce67 = and i16 %v6, %v7
602 %vreduce0123 = and i16 %vreduce01, %vreduce23
603 %vreduce4567 = and i16 %vreduce45, %vreduce67
604 %vreduce = and i16 %vreduce0123, %vreduce4567
605 %vcheck = icmp eq i16 %vreduce, -1
609 define i1 @test_v16i16(ptr %ptr) nounwind {
610 ; SSE2-LABEL: test_v16i16:
612 ; SSE2-NEXT: movdqa (%rdi), %xmm0
613 ; SSE2-NEXT: pand 16(%rdi), %xmm0
614 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
615 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
616 ; SSE2-NEXT: pmovmskb %xmm1, %eax
617 ; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
618 ; SSE2-NEXT: sete %al
621 ; SSE41-LABEL: test_v16i16:
623 ; SSE41-NEXT: movdqa (%rdi), %xmm0
624 ; SSE41-NEXT: pand 16(%rdi), %xmm0
625 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
626 ; SSE41-NEXT: ptest %xmm1, %xmm0
627 ; SSE41-NEXT: setb %al
630 ; AVX1-LABEL: test_v16i16:
632 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
633 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
634 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
635 ; AVX1-NEXT: vptest %ymm1, %ymm0
636 ; AVX1-NEXT: setb %al
637 ; AVX1-NEXT: vzeroupper
640 ; AVX2-LABEL: test_v16i16:
642 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
643 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
644 ; AVX2-NEXT: vptest %ymm1, %ymm0
645 ; AVX2-NEXT: setb %al
646 ; AVX2-NEXT: vzeroupper
649 ; AVX512-LABEL: test_v16i16:
651 ; AVX512-NEXT: vmovdqa (%rdi), %ymm0
652 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
653 ; AVX512-NEXT: vptest %ymm1, %ymm0
654 ; AVX512-NEXT: setb %al
655 ; AVX512-NEXT: vzeroupper
657 %vload = load <16 x i16>, ptr %ptr
658 %v0 = extractelement <16 x i16> %vload, i32 0
659 %v1 = extractelement <16 x i16> %vload, i32 1
660 %v2 = extractelement <16 x i16> %vload, i32 2
661 %v3 = extractelement <16 x i16> %vload, i32 3
662 %v4 = extractelement <16 x i16> %vload, i32 4
663 %v5 = extractelement <16 x i16> %vload, i32 5
664 %v6 = extractelement <16 x i16> %vload, i32 6
665 %v7 = extractelement <16 x i16> %vload, i32 7
666 %v8 = extractelement <16 x i16> %vload, i32 8
667 %v9 = extractelement <16 x i16> %vload, i32 9
668 %v10 = extractelement <16 x i16> %vload, i32 10
669 %v11 = extractelement <16 x i16> %vload, i32 11
670 %v12 = extractelement <16 x i16> %vload, i32 12
671 %v13 = extractelement <16 x i16> %vload, i32 13
672 %v14 = extractelement <16 x i16> %vload, i32 14
673 %v15 = extractelement <16 x i16> %vload, i32 15
674 %vreduce01 = and i16 %v0, %v1
675 %vreduce23 = and i16 %v2, %v3
676 %vreduce45 = and i16 %v4, %v5
677 %vreduce67 = and i16 %v6, %v7
678 %vreduce89 = and i16 %v8, %v9
679 %vreduce1011 = and i16 %v10, %v11
680 %vreduce1213 = and i16 %v12, %v13
681 %vreduce1415 = and i16 %v14, %v15
682 %vreduce0123 = and i16 %vreduce01, %vreduce23
683 %vreduce4567 = and i16 %vreduce45, %vreduce67
684 %vreduce891011 = and i16 %vreduce89, %vreduce1011
685 %vreduce12131415 = and i16 %vreduce1213, %vreduce1415
686 %vreduce01234567 = and i16 %vreduce0123, %vreduce4567
687 %vreduce89101112131415 = and i16 %vreduce891011, %vreduce12131415
688 %vreduce = and i16 %vreduce01234567, %vreduce89101112131415
689 %vcheck = icmp eq i16 %vreduce, -1
697 define i1 @test_v2i8(ptr %ptr) nounwind {
698 ; SSE-LABEL: test_v2i8:
700 ; SSE-NEXT: cmpw $-1, (%rdi)
704 ; AVX-LABEL: test_v2i8:
706 ; AVX-NEXT: cmpw $-1, (%rdi)
709 %vload = load <2 x i8>, ptr %ptr
710 %v0 = extractelement <2 x i8> %vload, i32 0
711 %v1 = extractelement <2 x i8> %vload, i32 1
712 %vreduce = and i8 %v0, %v1
713 %vcheck = icmp eq i8 %vreduce, -1
717 define i1 @test_v4i8(ptr %ptr) nounwind {
718 ; SSE-LABEL: test_v4i8:
720 ; SSE-NEXT: cmpl $-1, (%rdi)
724 ; AVX-LABEL: test_v4i8:
726 ; AVX-NEXT: cmpl $-1, (%rdi)
729 %vload = load <4 x i8>, ptr %ptr
730 %v0 = extractelement <4 x i8> %vload, i32 0
731 %v1 = extractelement <4 x i8> %vload, i32 1
732 %v2 = extractelement <4 x i8> %vload, i32 2
733 %v3 = extractelement <4 x i8> %vload, i32 3
734 %vreduce01 = and i8 %v0, %v1
735 %vreduce23 = and i8 %v2, %v3
736 %vreduce = and i8 %vreduce01, %vreduce23
737 %vcheck = icmp eq i8 %vreduce, -1
741 define i1 @test_v8i8(ptr %ptr) nounwind {
742 ; SSE-LABEL: test_v8i8:
744 ; SSE-NEXT: cmpq $-1, (%rdi)
748 ; AVX-LABEL: test_v8i8:
750 ; AVX-NEXT: cmpq $-1, (%rdi)
753 %vload = load <8 x i8>, ptr %ptr
754 %v0 = extractelement <8 x i8> %vload, i32 0
755 %v1 = extractelement <8 x i8> %vload, i32 1
756 %v2 = extractelement <8 x i8> %vload, i32 2
757 %v3 = extractelement <8 x i8> %vload, i32 3
758 %v4 = extractelement <8 x i8> %vload, i32 4
759 %v5 = extractelement <8 x i8> %vload, i32 5
760 %v6 = extractelement <8 x i8> %vload, i32 6
761 %v7 = extractelement <8 x i8> %vload, i32 7
762 %vreduce01 = and i8 %v0, %v1
763 %vreduce23 = and i8 %v2, %v3
764 %vreduce45 = and i8 %v4, %v5
765 %vreduce67 = and i8 %v6, %v7
766 %vreduce0123 = and i8 %vreduce01, %vreduce23
767 %vreduce4567 = and i8 %vreduce45, %vreduce67
768 %vreduce = and i8 %vreduce0123, %vreduce4567
769 %vcheck = icmp eq i8 %vreduce, -1
773 define i1 @test_v16i8(ptr %ptr) nounwind {
774 ; SSE2-LABEL: test_v16i8:
776 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
777 ; SSE2-NEXT: pcmpeqb (%rdi), %xmm0
778 ; SSE2-NEXT: pmovmskb %xmm0, %eax
779 ; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
780 ; SSE2-NEXT: sete %al
783 ; SSE41-LABEL: test_v16i8:
785 ; SSE41-NEXT: movdqa (%rdi), %xmm0
786 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
787 ; SSE41-NEXT: ptest %xmm1, %xmm0
788 ; SSE41-NEXT: setb %al
791 ; AVX-LABEL: test_v16i8:
793 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
794 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
795 ; AVX-NEXT: vptest %xmm1, %xmm0
798 %vload = load <16 x i8>, ptr %ptr
799 %v0 = extractelement <16 x i8> %vload, i32 0
800 %v1 = extractelement <16 x i8> %vload, i32 1
801 %v2 = extractelement <16 x i8> %vload, i32 2
802 %v3 = extractelement <16 x i8> %vload, i32 3
803 %v4 = extractelement <16 x i8> %vload, i32 4
804 %v5 = extractelement <16 x i8> %vload, i32 5
805 %v6 = extractelement <16 x i8> %vload, i32 6
806 %v7 = extractelement <16 x i8> %vload, i32 7
807 %v8 = extractelement <16 x i8> %vload, i32 8
808 %v9 = extractelement <16 x i8> %vload, i32 9
809 %v10 = extractelement <16 x i8> %vload, i32 10
810 %v11 = extractelement <16 x i8> %vload, i32 11
811 %v12 = extractelement <16 x i8> %vload, i32 12
812 %v13 = extractelement <16 x i8> %vload, i32 13
813 %v14 = extractelement <16 x i8> %vload, i32 14
814 %v15 = extractelement <16 x i8> %vload, i32 15
815 %vreduce01 = and i8 %v0, %v1
816 %vreduce23 = and i8 %v2, %v3
817 %vreduce45 = and i8 %v4, %v5
818 %vreduce67 = and i8 %v6, %v7
819 %vreduce89 = and i8 %v8, %v9
820 %vreduce1011 = and i8 %v10, %v11
821 %vreduce1213 = and i8 %v12, %v13
822 %vreduce1415 = and i8 %v14, %v15
823 %vreduce0123 = and i8 %vreduce01, %vreduce23
824 %vreduce4567 = and i8 %vreduce45, %vreduce67
825 %vreduce891011 = and i8 %vreduce89, %vreduce1011
826 %vreduce12131415 = and i8 %vreduce1213, %vreduce1415
827 %vreduce01234567 = and i8 %vreduce0123, %vreduce4567
828 %vreduce89101112131415 = and i8 %vreduce891011, %vreduce12131415
829 %vreduce = and i8 %vreduce01234567, %vreduce89101112131415
830 %vcheck = icmp eq i8 %vreduce, -1
834 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: