1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
7 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8 ; SSE-LABEL: test_v2f64_sext:
10 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
11 ; SSE-NEXT: movmskpd %xmm1, %ecx
12 ; SSE-NEXT: xorl %eax, %eax
13 ; SSE-NEXT: cmpl $3, %ecx
18 ; AVX-LABEL: test_v2f64_sext:
20 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
21 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
22 ; AVX-NEXT: xorl %eax, %eax
23 ; AVX-NEXT: cmpl $3, %ecx
28 ; AVX512-LABEL: test_v2f64_sext:
30 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
31 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
32 ; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
33 ; AVX512-NEXT: vmovq %xmm0, %rax
35 %c = fcmp ogt <2 x double> %a0, %a1
36 %s = sext <2 x i1> %c to <2 x i64>
37 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
38 %2 = and <2 x i64> %s, %1
39 %3 = extractelement <2 x i64> %2, i32 0
43 define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
44 ; SSE-LABEL: test_v4f64_sext:
46 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
47 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
48 ; SSE-NEXT: andpd %xmm3, %xmm2
49 ; SSE-NEXT: movmskpd %xmm2, %ecx
50 ; SSE-NEXT: xorl %eax, %eax
51 ; SSE-NEXT: cmpl $3, %ecx
56 ; AVX-LABEL: test_v4f64_sext:
58 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
59 ; AVX-NEXT: vmovmskpd %ymm0, %ecx
60 ; AVX-NEXT: xorl %eax, %eax
61 ; AVX-NEXT: cmpl $15, %ecx
64 ; AVX-NEXT: vzeroupper
67 ; AVX512-LABEL: test_v4f64_sext:
69 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
70 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
71 ; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
72 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
73 ; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
74 ; AVX512-NEXT: vmovq %xmm0, %rax
75 ; AVX512-NEXT: vzeroupper
77 %c = fcmp ogt <4 x double> %a0, %a1
78 %s = sext <4 x i1> %c to <4 x i64>
79 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
80 %2 = and <4 x i64> %s, %1
81 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
82 %4 = and <4 x i64> %2, %3
83 %5 = extractelement <4 x i64> %4, i64 0
87 define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
88 ; SSE-LABEL: test_v4f64_legal_sext:
90 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
91 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
92 ; SSE-NEXT: packssdw %xmm3, %xmm2
93 ; SSE-NEXT: movmskps %xmm2, %ecx
94 ; SSE-NEXT: xorl %eax, %eax
95 ; SSE-NEXT: cmpl $15, %ecx
100 ; AVX-LABEL: test_v4f64_legal_sext:
102 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
103 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
104 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
105 ; AVX-NEXT: vmovmskps %xmm0, %ecx
106 ; AVX-NEXT: xorl %eax, %eax
107 ; AVX-NEXT: cmpl $15, %ecx
109 ; AVX-NEXT: negq %rax
110 ; AVX-NEXT: vzeroupper
113 ; AVX512-LABEL: test_v4f64_legal_sext:
115 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
116 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
117 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
118 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
119 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
120 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
121 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
122 ; AVX512-NEXT: vmovd %xmm0, %eax
124 ; AVX512-NEXT: vzeroupper
126 %c = fcmp ogt <4 x double> %a0, %a1
127 %s = sext <4 x i1> %c to <4 x i32>
128 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
129 %2 = and <4 x i32> %s, %1
130 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
131 %4 = and <4 x i32> %2, %3
132 %5 = extractelement <4 x i32> %4, i64 0
133 %6 = sext i32 %5 to i64
137 define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
138 ; SSE-LABEL: test_v4f32_sext:
140 ; SSE-NEXT: cmpltps %xmm0, %xmm1
141 ; SSE-NEXT: movmskps %xmm1, %ecx
142 ; SSE-NEXT: xorl %eax, %eax
143 ; SSE-NEXT: cmpl $15, %ecx
145 ; SSE-NEXT: negl %eax
148 ; AVX-LABEL: test_v4f32_sext:
150 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
151 ; AVX-NEXT: vmovmskps %xmm0, %ecx
152 ; AVX-NEXT: xorl %eax, %eax
153 ; AVX-NEXT: cmpl $15, %ecx
155 ; AVX-NEXT: negl %eax
158 ; AVX512-LABEL: test_v4f32_sext:
160 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
161 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
162 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
163 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
164 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
165 ; AVX512-NEXT: vmovd %xmm0, %eax
167 %c = fcmp ogt <4 x float> %a0, %a1
168 %s = sext <4 x i1> %c to <4 x i32>
169 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
170 %2 = and <4 x i32> %s, %1
171 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
172 %4 = and <4 x i32> %2, %3
173 %5 = extractelement <4 x i32> %4, i32 0
177 define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
178 ; SSE-LABEL: test_v8f32_sext:
180 ; SSE-NEXT: cmpltps %xmm1, %xmm3
181 ; SSE-NEXT: cmpltps %xmm0, %xmm2
182 ; SSE-NEXT: andps %xmm3, %xmm2
183 ; SSE-NEXT: movmskps %xmm2, %ecx
184 ; SSE-NEXT: xorl %eax, %eax
185 ; SSE-NEXT: cmpl $15, %ecx
187 ; SSE-NEXT: negl %eax
190 ; AVX-LABEL: test_v8f32_sext:
192 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
193 ; AVX-NEXT: vmovmskps %ymm0, %ecx
194 ; AVX-NEXT: xorl %eax, %eax
195 ; AVX-NEXT: cmpl $255, %ecx
197 ; AVX-NEXT: negl %eax
198 ; AVX-NEXT: vzeroupper
201 ; AVX512-LABEL: test_v8f32_sext:
203 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
204 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
205 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
206 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
207 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
208 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
209 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
210 ; AVX512-NEXT: vmovd %xmm0, %eax
211 ; AVX512-NEXT: vzeroupper
213 %c = fcmp ogt <8 x float> %a0, %a1
214 %s = sext <8 x i1> %c to <8 x i32>
215 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
216 %2 = and <8 x i32> %s, %1
217 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
218 %4 = and <8 x i32> %2, %3
219 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
220 %6 = and <8 x i32> %4, %5
221 %7 = extractelement <8 x i32> %6, i32 0
225 define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
226 ; SSE-LABEL: test_v8f32_legal_sext:
228 ; SSE-NEXT: cmpltps %xmm1, %xmm3
229 ; SSE-NEXT: cmpltps %xmm0, %xmm2
230 ; SSE-NEXT: packssdw %xmm3, %xmm2
231 ; SSE-NEXT: pmovmskb %xmm2, %ecx
232 ; SSE-NEXT: xorl %eax, %eax
233 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
235 ; SSE-NEXT: negl %eax
238 ; AVX-LABEL: test_v8f32_legal_sext:
240 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
241 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
242 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
243 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
244 ; AVX-NEXT: xorl %eax, %eax
245 ; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
247 ; AVX-NEXT: negl %eax
248 ; AVX-NEXT: vzeroupper
251 ; AVX512-LABEL: test_v8f32_legal_sext:
253 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
254 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
255 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
256 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
257 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
258 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
259 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
260 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
261 ; AVX512-NEXT: vmovd %xmm0, %eax
263 ; AVX512-NEXT: vzeroupper
265 %c = fcmp ogt <8 x float> %a0, %a1
266 %s = sext <8 x i1> %c to <8 x i16>
267 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
268 %2 = and <8 x i16> %s, %1
269 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
270 %4 = and <8 x i16> %2, %3
271 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
272 %6 = and <8 x i16> %4, %5
273 %7 = extractelement <8 x i16> %6, i32 0
274 %8 = sext i16 %7 to i32
278 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
279 ; SSE-LABEL: test_v2i64_sext:
281 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
282 ; SSE-NEXT: movmskpd %xmm0, %ecx
283 ; SSE-NEXT: xorl %eax, %eax
284 ; SSE-NEXT: cmpl $3, %ecx
286 ; SSE-NEXT: negq %rax
289 ; AVX-LABEL: test_v2i64_sext:
291 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
292 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
293 ; AVX-NEXT: xorl %eax, %eax
294 ; AVX-NEXT: cmpl $3, %ecx
296 ; AVX-NEXT: negq %rax
299 ; AVX512-LABEL: test_v2i64_sext:
301 ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
302 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
303 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
304 ; AVX512-NEXT: vmovq %xmm0, %rax
306 %c = icmp sgt <2 x i64> %a0, %a1
307 %s = sext <2 x i1> %c to <2 x i64>
308 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
309 %2 = and <2 x i64> %s, %1
310 %3 = extractelement <2 x i64> %2, i32 0
314 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
315 ; SSE-LABEL: test_v4i64_sext:
317 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
318 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
319 ; SSE-NEXT: pand %xmm1, %xmm0
320 ; SSE-NEXT: movmskpd %xmm0, %ecx
321 ; SSE-NEXT: xorl %eax, %eax
322 ; SSE-NEXT: cmpl $3, %ecx
324 ; SSE-NEXT: negq %rax
327 ; AVX1-LABEL: test_v4i64_sext:
329 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
330 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
331 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
332 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
333 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
334 ; AVX1-NEXT: vmovmskpd %ymm0, %ecx
335 ; AVX1-NEXT: xorl %eax, %eax
336 ; AVX1-NEXT: cmpl $15, %ecx
337 ; AVX1-NEXT: sete %al
338 ; AVX1-NEXT: negq %rax
339 ; AVX1-NEXT: vzeroupper
342 ; AVX2-LABEL: test_v4i64_sext:
344 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
345 ; AVX2-NEXT: vmovmskpd %ymm0, %ecx
346 ; AVX2-NEXT: xorl %eax, %eax
347 ; AVX2-NEXT: cmpl $15, %ecx
348 ; AVX2-NEXT: sete %al
349 ; AVX2-NEXT: negq %rax
350 ; AVX2-NEXT: vzeroupper
353 ; AVX512-LABEL: test_v4i64_sext:
355 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
356 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
357 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
358 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
359 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
360 ; AVX512-NEXT: vmovq %xmm0, %rax
361 ; AVX512-NEXT: vzeroupper
363 %c = icmp sgt <4 x i64> %a0, %a1
364 %s = sext <4 x i1> %c to <4 x i64>
365 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
366 %2 = and <4 x i64> %s, %1
367 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
368 %4 = and <4 x i64> %2, %3
369 %5 = extractelement <4 x i64> %4, i64 0
373 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
374 ; SSE-LABEL: test_v4i64_legal_sext:
376 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
377 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
378 ; SSE-NEXT: packssdw %xmm1, %xmm0
379 ; SSE-NEXT: movmskps %xmm0, %ecx
380 ; SSE-NEXT: xorl %eax, %eax
381 ; SSE-NEXT: cmpl $15, %ecx
383 ; SSE-NEXT: negq %rax
386 ; AVX1-LABEL: test_v4i64_legal_sext:
388 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
389 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
390 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
391 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
392 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
393 ; AVX1-NEXT: vmovmskps %xmm0, %ecx
394 ; AVX1-NEXT: xorl %eax, %eax
395 ; AVX1-NEXT: cmpl $15, %ecx
396 ; AVX1-NEXT: sete %al
397 ; AVX1-NEXT: negq %rax
398 ; AVX1-NEXT: vzeroupper
401 ; AVX2-LABEL: test_v4i64_legal_sext:
403 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
404 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
405 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
406 ; AVX2-NEXT: vmovmskps %xmm0, %ecx
407 ; AVX2-NEXT: xorl %eax, %eax
408 ; AVX2-NEXT: cmpl $15, %ecx
409 ; AVX2-NEXT: sete %al
410 ; AVX2-NEXT: negq %rax
411 ; AVX2-NEXT: vzeroupper
414 ; AVX512-LABEL: test_v4i64_legal_sext:
416 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
417 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
418 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
419 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
420 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
421 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
422 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
423 ; AVX512-NEXT: vmovd %xmm0, %eax
425 ; AVX512-NEXT: vzeroupper
427 %c = icmp sgt <4 x i64> %a0, %a1
428 %s = sext <4 x i1> %c to <4 x i32>
429 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
430 %2 = and <4 x i32> %s, %1
431 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
432 %4 = and <4 x i32> %2, %3
433 %5 = extractelement <4 x i32> %4, i64 0
434 %6 = sext i32 %5 to i64
438 define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
439 ; SSE-LABEL: test_v4i32_sext:
441 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
442 ; SSE-NEXT: movmskps %xmm0, %ecx
443 ; SSE-NEXT: xorl %eax, %eax
444 ; SSE-NEXT: cmpl $15, %ecx
446 ; SSE-NEXT: negl %eax
449 ; AVX-LABEL: test_v4i32_sext:
451 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
452 ; AVX-NEXT: vmovmskps %xmm0, %ecx
453 ; AVX-NEXT: xorl %eax, %eax
454 ; AVX-NEXT: cmpl $15, %ecx
456 ; AVX-NEXT: negl %eax
459 ; AVX512-LABEL: test_v4i32_sext:
461 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
462 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
463 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
464 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
465 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
466 ; AVX512-NEXT: vmovd %xmm0, %eax
468 %c = icmp sgt <4 x i32> %a0, %a1
469 %s = sext <4 x i1> %c to <4 x i32>
470 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
471 %2 = and <4 x i32> %s, %1
472 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
473 %4 = and <4 x i32> %2, %3
474 %5 = extractelement <4 x i32> %4, i32 0
478 define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
479 ; SSE-LABEL: test_v8i32_sext:
481 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
482 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
483 ; SSE-NEXT: pand %xmm1, %xmm0
484 ; SSE-NEXT: movmskps %xmm0, %ecx
485 ; SSE-NEXT: xorl %eax, %eax
486 ; SSE-NEXT: cmpl $15, %ecx
488 ; SSE-NEXT: negl %eax
491 ; AVX1-LABEL: test_v8i32_sext:
493 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
494 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
495 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
496 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
497 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
498 ; AVX1-NEXT: vmovmskps %ymm0, %ecx
499 ; AVX1-NEXT: xorl %eax, %eax
500 ; AVX1-NEXT: cmpl $255, %ecx
501 ; AVX1-NEXT: sete %al
502 ; AVX1-NEXT: negl %eax
503 ; AVX1-NEXT: vzeroupper
506 ; AVX2-LABEL: test_v8i32_sext:
508 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
509 ; AVX2-NEXT: vmovmskps %ymm0, %ecx
510 ; AVX2-NEXT: xorl %eax, %eax
511 ; AVX2-NEXT: cmpl $255, %ecx
512 ; AVX2-NEXT: sete %al
513 ; AVX2-NEXT: negl %eax
514 ; AVX2-NEXT: vzeroupper
517 ; AVX512-LABEL: test_v8i32_sext:
519 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
520 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
521 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
522 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
523 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
524 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
525 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
526 ; AVX512-NEXT: vmovd %xmm0, %eax
527 ; AVX512-NEXT: vzeroupper
529 %c = icmp sgt <8 x i32> %a0, %a1
530 %s = sext <8 x i1> %c to <8 x i32>
531 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
532 %2 = and <8 x i32> %s, %1
533 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
534 %4 = and <8 x i32> %2, %3
535 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
536 %6 = and <8 x i32> %4, %5
537 %7 = extractelement <8 x i32> %6, i32 0
541 define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
542 ; SSE-LABEL: test_v8i32_legal_sext:
544 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
545 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
546 ; SSE-NEXT: packssdw %xmm1, %xmm0
547 ; SSE-NEXT: pmovmskb %xmm0, %ecx
548 ; SSE-NEXT: xorl %eax, %eax
549 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
551 ; SSE-NEXT: negl %eax
554 ; AVX1-LABEL: test_v8i32_legal_sext:
556 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
557 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
558 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
559 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
560 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
561 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
562 ; AVX1-NEXT: xorl %eax, %eax
563 ; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
564 ; AVX1-NEXT: sete %al
565 ; AVX1-NEXT: negl %eax
566 ; AVX1-NEXT: vzeroupper
569 ; AVX2-LABEL: test_v8i32_legal_sext:
571 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
572 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
573 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
574 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
575 ; AVX2-NEXT: xorl %eax, %eax
576 ; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
577 ; AVX2-NEXT: sete %al
578 ; AVX2-NEXT: negl %eax
579 ; AVX2-NEXT: vzeroupper
582 ; AVX512-LABEL: test_v8i32_legal_sext:
584 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
585 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
586 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
587 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
588 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
589 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
590 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
591 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
592 ; AVX512-NEXT: vmovd %xmm0, %eax
594 ; AVX512-NEXT: vzeroupper
596 %c = icmp sgt <8 x i32> %a0, %a1
597 %s = sext <8 x i1> %c to <8 x i16>
598 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
599 %2 = and <8 x i16> %s, %1
600 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
601 %4 = and <8 x i16> %2, %3
602 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
603 %6 = and <8 x i16> %4, %5
604 %7 = extractelement <8 x i16> %6, i32 0
605 %8 = sext i16 %7 to i32
609 define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
610 ; SSE-LABEL: test_v8i16_sext:
612 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
613 ; SSE-NEXT: pmovmskb %xmm0, %ecx
614 ; SSE-NEXT: xorl %eax, %eax
615 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
617 ; SSE-NEXT: negl %eax
618 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
621 ; AVX-LABEL: test_v8i16_sext:
623 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
624 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
625 ; AVX-NEXT: xorl %eax, %eax
626 ; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
628 ; AVX-NEXT: negl %eax
629 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
632 ; AVX512-LABEL: test_v8i16_sext:
634 ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
635 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
636 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
637 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
638 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
639 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
640 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
641 ; AVX512-NEXT: vmovd %xmm0, %eax
642 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
644 %c = icmp sgt <8 x i16> %a0, %a1
645 %s = sext <8 x i1> %c to <8 x i16>
646 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
647 %2 = and <8 x i16> %s, %1
648 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
649 %4 = and <8 x i16> %2, %3
650 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
651 %6 = and <8 x i16> %4, %5
652 %7 = extractelement <8 x i16> %6, i32 0
656 define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
657 ; SSE-LABEL: test_v16i16_sext:
659 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
660 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
661 ; SSE-NEXT: pand %xmm1, %xmm0
662 ; SSE-NEXT: pmovmskb %xmm0, %ecx
663 ; SSE-NEXT: xorl %eax, %eax
664 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
666 ; SSE-NEXT: negl %eax
667 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
670 ; AVX1-LABEL: test_v16i16_sext:
672 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
673 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
674 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
675 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
676 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
677 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
678 ; AVX1-NEXT: xorl %eax, %eax
679 ; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
680 ; AVX1-NEXT: sete %al
681 ; AVX1-NEXT: negl %eax
682 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
683 ; AVX1-NEXT: vzeroupper
686 ; AVX2-LABEL: test_v16i16_sext:
688 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
689 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
690 ; AVX2-NEXT: xorl %eax, %eax
691 ; AVX2-NEXT: cmpl $-1, %ecx
692 ; AVX2-NEXT: sete %al
693 ; AVX2-NEXT: negl %eax
694 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
695 ; AVX2-NEXT: vzeroupper
698 ; AVX512-LABEL: test_v16i16_sext:
700 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
701 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
702 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
703 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
704 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
705 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
706 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
707 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
708 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
709 ; AVX512-NEXT: vmovd %xmm0, %eax
710 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
711 ; AVX512-NEXT: vzeroupper
713 %c = icmp sgt <16 x i16> %a0, %a1
714 %s = sext <16 x i1> %c to <16 x i16>
715 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
716 %2 = and <16 x i16> %s, %1
717 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
718 %4 = and <16 x i16> %2, %3
719 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
720 %6 = and <16 x i16> %4, %5
721 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
722 %8 = and <16 x i16> %6, %7
723 %9 = extractelement <16 x i16> %8, i32 0
727 define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
728 ; SSE-LABEL: test_v16i16_legal_sext:
730 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
731 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
732 ; SSE-NEXT: packsswb %xmm1, %xmm0
733 ; SSE-NEXT: pmovmskb %xmm0, %eax
734 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
737 ; SSE-NEXT: movsbl %al, %eax
738 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
741 ; AVX1-LABEL: test_v16i16_legal_sext:
743 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
744 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
745 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
746 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
747 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
748 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
749 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
750 ; AVX1-NEXT: sete %al
751 ; AVX1-NEXT: negb %al
752 ; AVX1-NEXT: movsbl %al, %eax
753 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
754 ; AVX1-NEXT: vzeroupper
757 ; AVX2-LABEL: test_v16i16_legal_sext:
759 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
760 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
761 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
762 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
763 ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
764 ; AVX2-NEXT: sete %al
765 ; AVX2-NEXT: negb %al
766 ; AVX2-NEXT: movsbl %al, %eax
767 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
768 ; AVX2-NEXT: vzeroupper
771 ; AVX512-LABEL: test_v16i16_legal_sext:
773 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
774 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
775 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
776 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
777 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
778 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
779 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
780 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
781 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
782 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
783 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
784 ; AVX512-NEXT: movsbl %al, %eax
785 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
786 ; AVX512-NEXT: vzeroupper
788 %c = icmp sgt <16 x i16> %a0, %a1
789 %s = sext <16 x i1> %c to <16 x i8>
790 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
791 %2 = and <16 x i8> %s, %1
792 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
793 %4 = and <16 x i8> %2, %3
794 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
795 %6 = and <16 x i8> %4, %5
796 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
797 %8 = and <16 x i8> %6, %7
798 %9 = extractelement <16 x i8> %8, i32 0
799 %10 = sext i8 %9 to i16
803 define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
804 ; SSE-LABEL: test_v16i8_sext:
806 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
807 ; SSE-NEXT: pmovmskb %xmm0, %eax
808 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
813 ; AVX-LABEL: test_v16i8_sext:
815 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
816 ; AVX-NEXT: vpmovmskb %xmm0, %eax
817 ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
822 ; AVX512-LABEL: test_v16i8_sext:
824 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
825 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
826 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
827 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
828 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
829 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
830 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
831 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
832 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
833 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
834 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
836 %c = icmp sgt <16 x i8> %a0, %a1
837 %s = sext <16 x i1> %c to <16 x i8>
838 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
839 %2 = and <16 x i8> %s, %1
840 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
841 %4 = and <16 x i8> %2, %3
842 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
843 %6 = and <16 x i8> %4, %5
844 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
845 %8 = and <16 x i8> %6, %7
846 %9 = extractelement <16 x i8> %8, i32 0
850 define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
851 ; SSE-LABEL: test_v32i8_sext:
853 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
854 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
855 ; SSE-NEXT: pand %xmm1, %xmm0
856 ; SSE-NEXT: pmovmskb %xmm0, %eax
857 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
862 ; AVX1-LABEL: test_v32i8_sext:
864 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
865 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
866 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
867 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
868 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
869 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
870 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
871 ; AVX1-NEXT: sete %al
872 ; AVX1-NEXT: negb %al
873 ; AVX1-NEXT: vzeroupper
876 ; AVX2-LABEL: test_v32i8_sext:
878 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
879 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
880 ; AVX2-NEXT: cmpl $-1, %eax
881 ; AVX2-NEXT: sete %al
882 ; AVX2-NEXT: negb %al
883 ; AVX2-NEXT: vzeroupper
886 ; AVX512-LABEL: test_v32i8_sext:
888 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
889 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
890 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
891 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
892 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
893 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
894 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
895 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
896 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
897 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
898 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
899 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
900 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
901 ; AVX512-NEXT: vzeroupper
903 %c = icmp sgt <32 x i8> %a0, %a1
904 %s = sext <32 x i1> %c to <32 x i8>
905 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
906 %2 = and <32 x i8> %s, %1
907 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
908 %4 = and <32 x i8> %2, %3
909 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
910 %6 = and <32 x i8> %4, %5
911 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
912 %8 = and <32 x i8> %6, %7
913 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
914 %10 = and <32 x i8> %8, %9
915 %11 = extractelement <32 x i8> %10, i32 0
919 define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
920 ; SSE-LABEL: bool_reduction_v2f64:
922 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
923 ; SSE-NEXT: movmskpd %xmm1, %eax
924 ; SSE-NEXT: cmpb $3, %al
928 ; AVX-LABEL: bool_reduction_v2f64:
930 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
931 ; AVX-NEXT: vmovmskpd %xmm0, %eax
932 ; AVX-NEXT: cmpb $3, %al
936 ; AVX512-LABEL: bool_reduction_v2f64:
938 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
939 ; AVX512-NEXT: kmovd %k0, %eax
940 ; AVX512-NEXT: andb $3, %al
941 ; AVX512-NEXT: cmpb $3, %al
942 ; AVX512-NEXT: sete %al
944 %a = fcmp ogt <2 x double> %x, %y
945 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
946 %c = and <2 x i1> %a, %b
947 %d = extractelement <2 x i1> %c, i32 0
951 define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
952 ; SSE-LABEL: bool_reduction_v4f32:
954 ; SSE-NEXT: cmpeqps %xmm1, %xmm0
955 ; SSE-NEXT: movmskps %xmm0, %eax
956 ; SSE-NEXT: cmpb $15, %al
960 ; AVX-LABEL: bool_reduction_v4f32:
962 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
963 ; AVX-NEXT: vmovmskps %xmm0, %eax
964 ; AVX-NEXT: cmpb $15, %al
968 ; AVX512-LABEL: bool_reduction_v4f32:
970 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
971 ; AVX512-NEXT: kmovd %k0, %eax
972 ; AVX512-NEXT: andb $15, %al
973 ; AVX512-NEXT: cmpb $15, %al
974 ; AVX512-NEXT: sete %al
976 %a = fcmp oeq <4 x float> %x, %y
977 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
978 %b = and <4 x i1> %s1, %a
979 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
980 %c = and <4 x i1> %s2, %b
981 %d = extractelement <4 x i1> %c, i32 0
985 define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
986 ; SSE-LABEL: bool_reduction_v4f64:
988 ; SSE-NEXT: cmplepd %xmm1, %xmm3
989 ; SSE-NEXT: cmplepd %xmm0, %xmm2
990 ; SSE-NEXT: packssdw %xmm3, %xmm2
991 ; SSE-NEXT: movmskps %xmm2, %eax
992 ; SSE-NEXT: cmpb $15, %al
996 ; AVX-LABEL: bool_reduction_v4f64:
998 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
999 ; AVX-NEXT: vmovmskpd %ymm0, %eax
1000 ; AVX-NEXT: cmpb $15, %al
1001 ; AVX-NEXT: sete %al
1002 ; AVX-NEXT: vzeroupper
1005 ; AVX512-LABEL: bool_reduction_v4f64:
1007 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
1008 ; AVX512-NEXT: kmovd %k0, %eax
1009 ; AVX512-NEXT: andb $15, %al
1010 ; AVX512-NEXT: cmpb $15, %al
1011 ; AVX512-NEXT: sete %al
1012 ; AVX512-NEXT: vzeroupper
1014 %a = fcmp oge <4 x double> %x, %y
1015 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1016 %b = and <4 x i1> %s1, %a
1017 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1018 %c = and <4 x i1> %s2, %b
1019 %d = extractelement <4 x i1> %c, i32 0
1023 define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
1024 ; SSE-LABEL: bool_reduction_v8f32:
1026 ; SSE-NEXT: cmpneqps %xmm3, %xmm1
1027 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
1028 ; SSE-NEXT: packssdw %xmm1, %xmm0
1029 ; SSE-NEXT: packsswb %xmm0, %xmm0
1030 ; SSE-NEXT: pmovmskb %xmm0, %eax
1031 ; SSE-NEXT: cmpb $-1, %al
1032 ; SSE-NEXT: sete %al
1035 ; AVX-LABEL: bool_reduction_v8f32:
1037 ; AVX-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
1038 ; AVX-NEXT: vmovmskps %ymm0, %eax
1039 ; AVX-NEXT: cmpb $-1, %al
1040 ; AVX-NEXT: sete %al
1041 ; AVX-NEXT: vzeroupper
1044 ; AVX512-LABEL: bool_reduction_v8f32:
1046 ; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0
1047 ; AVX512-NEXT: kmovd %k0, %eax
1048 ; AVX512-NEXT: cmpb $-1, %al
1049 ; AVX512-NEXT: sete %al
1050 ; AVX512-NEXT: vzeroupper
1052 %a = fcmp une <8 x float> %x, %y
1053 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1054 %b = and <8 x i1> %s1, %a
1055 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1056 %c = and <8 x i1> %s2, %b
1057 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1058 %d = and <8 x i1> %s3, %c
1059 %e = extractelement <8 x i1> %d, i32 0
1063 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
1064 ; SSE-LABEL: bool_reduction_v2i64:
1066 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1067 ; SSE-NEXT: pxor %xmm2, %xmm1
1068 ; SSE-NEXT: pxor %xmm2, %xmm0
1069 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
1070 ; SSE-NEXT: movmskpd %xmm0, %eax
1071 ; SSE-NEXT: cmpb $3, %al
1072 ; SSE-NEXT: sete %al
1075 ; AVX-LABEL: bool_reduction_v2i64:
1077 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1078 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
1079 ; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
1080 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
1081 ; AVX-NEXT: vmovmskpd %xmm0, %eax
1082 ; AVX-NEXT: cmpb $3, %al
1083 ; AVX-NEXT: sete %al
1086 ; AVX512-LABEL: bool_reduction_v2i64:
1088 ; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
1089 ; AVX512-NEXT: kmovd %k0, %eax
1090 ; AVX512-NEXT: andb $3, %al
1091 ; AVX512-NEXT: cmpb $3, %al
1092 ; AVX512-NEXT: sete %al
1094 %a = icmp ugt <2 x i64> %x, %y
1095 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
1096 %c = and <2 x i1> %a, %b
1097 %d = extractelement <2 x i1> %c, i32 0
1101 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
1102 ; SSE-LABEL: bool_reduction_v4i32:
1104 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
1105 ; SSE-NEXT: movmskps %xmm0, %eax
1106 ; SSE-NEXT: xorl $15, %eax
1107 ; SSE-NEXT: cmpb $15, %al
1108 ; SSE-NEXT: sete %al
1111 ; AVX-LABEL: bool_reduction_v4i32:
1113 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1114 ; AVX-NEXT: vmovmskps %xmm0, %eax
1115 ; AVX-NEXT: xorl $15, %eax
1116 ; AVX-NEXT: cmpb $15, %al
1117 ; AVX-NEXT: sete %al
1120 ; AVX512-LABEL: bool_reduction_v4i32:
1122 ; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
1123 ; AVX512-NEXT: kmovd %k0, %eax
1124 ; AVX512-NEXT: andb $15, %al
1125 ; AVX512-NEXT: cmpb $15, %al
1126 ; AVX512-NEXT: sete %al
1128 %a = icmp ne <4 x i32> %x, %y
1129 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1130 %b = and <4 x i1> %s1, %a
1131 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1132 %c = and <4 x i1> %s2, %b
1133 %d = extractelement <4 x i1> %c, i32 0
1137 define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
1138 ; SSE-LABEL: bool_reduction_v8i16:
1140 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1
1141 ; SSE-NEXT: packsswb %xmm0, %xmm1
1142 ; SSE-NEXT: pmovmskb %xmm1, %eax
1143 ; SSE-NEXT: cmpb $-1, %al
1144 ; SSE-NEXT: sete %al
1147 ; AVX-LABEL: bool_reduction_v8i16:
1149 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1150 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1151 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1152 ; AVX-NEXT: cmpb $-1, %al
1153 ; AVX-NEXT: sete %al
1156 ; AVX512-LABEL: bool_reduction_v8i16:
1158 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0
1159 ; AVX512-NEXT: kmovd %k0, %eax
1160 ; AVX512-NEXT: cmpb $-1, %al
1161 ; AVX512-NEXT: sete %al
1163 %a = icmp slt <8 x i16> %x, %y
1164 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1165 %b = and <8 x i1> %s1, %a
1166 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1167 %c = and <8 x i1> %s2, %b
1168 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1169 %d = and <8 x i1> %s3, %c
1170 %e = extractelement <8 x i1> %d, i32 0
1174 define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1175 ; SSE-LABEL: bool_reduction_v16i8:
1177 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
1178 ; SSE-NEXT: pmovmskb %xmm0, %eax
1179 ; SSE-NEXT: cmpw $-1, %ax
1180 ; SSE-NEXT: sete %al
1183 ; AVX-LABEL: bool_reduction_v16i8:
1185 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
1186 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1187 ; AVX-NEXT: cmpw $-1, %ax
1188 ; AVX-NEXT: sete %al
1191 ; AVX512-LABEL: bool_reduction_v16i8:
1193 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
1194 ; AVX512-NEXT: kortestw %k0, %k0
1195 ; AVX512-NEXT: setb %al
1197 %a = icmp sgt <16 x i8> %x, %y
1198 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1199 %b = and <16 x i1> %s1, %a
1200 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1201 %c = and <16 x i1> %s2, %b
1202 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1203 %d = and <16 x i1> %s3, %c
1204 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1205 %e = and <16 x i1> %s4, %d
1206 %f = extractelement <16 x i1> %e, i32 0
1210 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1211 ; SSE-LABEL: bool_reduction_v4i64:
1213 ; SSE-NEXT: pcmpgtq %xmm1, %xmm3
1214 ; SSE-NEXT: pcmpgtq %xmm0, %xmm2
1215 ; SSE-NEXT: packssdw %xmm3, %xmm2
1216 ; SSE-NEXT: movmskps %xmm2, %eax
1217 ; SSE-NEXT: cmpb $15, %al
1218 ; SSE-NEXT: sete %al
1221 ; AVX1-LABEL: bool_reduction_v4i64:
1223 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1224 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1225 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1226 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1227 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1228 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1229 ; AVX1-NEXT: cmpb $15, %al
1230 ; AVX1-NEXT: sete %al
1231 ; AVX1-NEXT: vzeroupper
1234 ; AVX2-LABEL: bool_reduction_v4i64:
1236 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
1237 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1238 ; AVX2-NEXT: cmpb $15, %al
1239 ; AVX2-NEXT: sete %al
1240 ; AVX2-NEXT: vzeroupper
1243 ; AVX512-LABEL: bool_reduction_v4i64:
1245 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
1246 ; AVX512-NEXT: kmovd %k0, %eax
1247 ; AVX512-NEXT: andb $15, %al
1248 ; AVX512-NEXT: cmpb $15, %al
1249 ; AVX512-NEXT: sete %al
1250 ; AVX512-NEXT: vzeroupper
1252 %a = icmp slt <4 x i64> %x, %y
1253 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1254 %b = and <4 x i1> %s1, %a
1255 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1256 %c = and <4 x i1> %s2, %b
1257 %d = extractelement <4 x i1> %c, i32 0
1261 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1262 ; SSE-LABEL: bool_reduction_v8i32:
1264 ; SSE-NEXT: pminud %xmm1, %xmm3
1265 ; SSE-NEXT: pcmpeqd %xmm1, %xmm3
1266 ; SSE-NEXT: pminud %xmm0, %xmm2
1267 ; SSE-NEXT: pcmpeqd %xmm0, %xmm2
1268 ; SSE-NEXT: packssdw %xmm3, %xmm2
1269 ; SSE-NEXT: packsswb %xmm0, %xmm2
1270 ; SSE-NEXT: pmovmskb %xmm2, %eax
1271 ; SSE-NEXT: cmpb $-1, %al
1272 ; SSE-NEXT: sete %al
1275 ; AVX1-LABEL: bool_reduction_v8i32:
1277 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1278 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1279 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1280 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
1281 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
1282 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1283 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1284 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1285 ; AVX1-NEXT: cmpb $-1, %al
1286 ; AVX1-NEXT: sete %al
1287 ; AVX1-NEXT: vzeroupper
1290 ; AVX2-LABEL: bool_reduction_v8i32:
1292 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
1293 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1294 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1295 ; AVX2-NEXT: cmpb $-1, %al
1296 ; AVX2-NEXT: sete %al
1297 ; AVX2-NEXT: vzeroupper
1300 ; AVX512-LABEL: bool_reduction_v8i32:
1302 ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0
1303 ; AVX512-NEXT: kmovd %k0, %eax
1304 ; AVX512-NEXT: cmpb $-1, %al
1305 ; AVX512-NEXT: sete %al
1306 ; AVX512-NEXT: vzeroupper
1308 %a = icmp ule <8 x i32> %x, %y
1309 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1310 %b = and <8 x i1> %s1, %a
1311 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1312 %c = and <8 x i1> %s2, %b
1313 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1314 %d = and <8 x i1> %s3, %c
1315 %e = extractelement <8 x i1> %d, i32 0
1319 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1320 ; SSE-LABEL: bool_reduction_v16i16:
1322 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
1323 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1324 ; SSE-NEXT: packsswb %xmm1, %xmm0
1325 ; SSE-NEXT: pmovmskb %xmm0, %eax
1326 ; SSE-NEXT: cmpw $-1, %ax
1327 ; SSE-NEXT: sete %al
1330 ; AVX1-LABEL: bool_reduction_v16i16:
1332 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1333 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1334 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
1335 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1336 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1337 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1338 ; AVX1-NEXT: cmpw $-1, %ax
1339 ; AVX1-NEXT: sete %al
1340 ; AVX1-NEXT: vzeroupper
1343 ; AVX2-LABEL: bool_reduction_v16i16:
1345 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1346 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1347 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1348 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1349 ; AVX2-NEXT: cmpw $-1, %ax
1350 ; AVX2-NEXT: sete %al
1351 ; AVX2-NEXT: vzeroupper
1354 ; AVX512-LABEL: bool_reduction_v16i16:
1356 ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
1357 ; AVX512-NEXT: kortestw %k0, %k0
1358 ; AVX512-NEXT: setb %al
1359 ; AVX512-NEXT: vzeroupper
1361 %a = icmp eq <16 x i16> %x, %y
1362 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1363 %b = and <16 x i1> %s1, %a
1364 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1365 %c = and <16 x i1> %s2, %b
1366 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1367 %d = and <16 x i1> %s3, %c
1368 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1369 %e = and <16 x i1> %s4, %d
1370 %f = extractelement <16 x i1> %e, i32 0
1374 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1375 ; SSE-LABEL: bool_reduction_v32i8:
1377 ; SSE-NEXT: pcmpeqb %xmm3, %xmm1
1378 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1379 ; SSE-NEXT: pand %xmm1, %xmm0
1380 ; SSE-NEXT: pmovmskb %xmm0, %eax
1381 ; SSE-NEXT: cmpw $-1, %ax
1382 ; SSE-NEXT: sete %al
1385 ; AVX1-LABEL: bool_reduction_v32i8:
1387 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1388 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1389 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
1390 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1391 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1392 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1393 ; AVX1-NEXT: cmpw $-1, %ax
1394 ; AVX1-NEXT: sete %al
1395 ; AVX1-NEXT: vzeroupper
1398 ; AVX2-LABEL: bool_reduction_v32i8:
1400 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1401 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1402 ; AVX2-NEXT: cmpl $-1, %eax
1403 ; AVX2-NEXT: sete %al
1404 ; AVX2-NEXT: vzeroupper
1407 ; AVX512-LABEL: bool_reduction_v32i8:
1409 ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
1410 ; AVX512-NEXT: kortestd %k0, %k0
1411 ; AVX512-NEXT: setb %al
1412 ; AVX512-NEXT: vzeroupper
1414 %a = icmp eq <32 x i8> %x, %y
1415 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1416 %b = and <32 x i1> %s1, %a
1417 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1418 %c = and <32 x i1> %s2, %b
1419 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1420 %d = and <32 x i1> %s3, %c
1421 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1422 %e = and <32 x i1> %s4, %d
1423 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1424 %f = and <32 x i1> %s5, %e
1425 %g = extractelement <32 x i1> %f, i32 0