1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
8 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
9 ; SSE-LABEL: test_v2f64_sext:
11 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
12 ; SSE-NEXT: movmskpd %xmm1, %ecx
13 ; SSE-NEXT: xorl %eax, %eax
14 ; SSE-NEXT: cmpl $3, %ecx
19 ; AVX-LABEL: test_v2f64_sext:
21 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
22 ; AVX-NEXT: xorl %eax, %eax
23 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
24 ; AVX-NEXT: vtestpd %xmm1, %xmm0
25 ; AVX-NEXT: sbbq %rax, %rax
27 %c = fcmp ogt <2 x double> %a0, %a1
28 %s = sext <2 x i1> %c to <2 x i64>
29 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
30 %2 = and <2 x i64> %s, %1
31 %3 = extractelement <2 x i64> %2, i32 0
35 define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
36 ; SSE-LABEL: test_v4f64_sext:
38 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
39 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
40 ; SSE-NEXT: andpd %xmm3, %xmm2
41 ; SSE-NEXT: movmskpd %xmm2, %ecx
42 ; SSE-NEXT: xorl %eax, %eax
43 ; SSE-NEXT: cmpl $3, %ecx
48 ; AVX1-LABEL: test_v4f64_sext:
50 ; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
51 ; AVX1-NEXT: xorl %eax, %eax
52 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
53 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
54 ; AVX1-NEXT: vtestpd %ymm1, %ymm0
55 ; AVX1-NEXT: sbbq %rax, %rax
56 ; AVX1-NEXT: vzeroupper
59 ; AVX2-LABEL: test_v4f64_sext:
61 ; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
62 ; AVX2-NEXT: xorl %eax, %eax
63 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
64 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
65 ; AVX2-NEXT: sbbq %rax, %rax
66 ; AVX2-NEXT: vzeroupper
69 ; AVX512-LABEL: test_v4f64_sext:
71 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
72 ; AVX512-NEXT: xorl %eax, %eax
73 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
74 ; AVX512-NEXT: vtestpd %ymm1, %ymm0
75 ; AVX512-NEXT: sbbq %rax, %rax
76 ; AVX512-NEXT: vzeroupper
78 %c = fcmp ogt <4 x double> %a0, %a1
79 %s = sext <4 x i1> %c to <4 x i64>
80 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
81 %2 = and <4 x i64> %s, %1
82 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
83 %4 = and <4 x i64> %2, %3
84 %5 = extractelement <4 x i64> %4, i64 0
88 define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
89 ; SSE-LABEL: test_v4f64_legal_sext:
91 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
92 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
93 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
94 ; SSE-NEXT: movmskps %xmm2, %ecx
95 ; SSE-NEXT: xorl %eax, %eax
96 ; SSE-NEXT: cmpl $15, %ecx
101 ; AVX1OR2-LABEL: test_v4f64_legal_sext:
103 ; AVX1OR2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
104 ; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1
105 ; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
106 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
107 ; AVX1OR2-NEXT: xorl %eax, %eax
108 ; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
109 ; AVX1OR2-NEXT: sbbq %rax, %rax
110 ; AVX1OR2-NEXT: vzeroupper
113 ; AVX512-LABEL: test_v4f64_legal_sext:
115 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
116 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
117 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z}
118 ; AVX512-NEXT: xorl %eax, %eax
119 ; AVX512-NEXT: vtestps %xmm0, %xmm1
120 ; AVX512-NEXT: sbbq %rax, %rax
121 ; AVX512-NEXT: vzeroupper
123 %c = fcmp ogt <4 x double> %a0, %a1
124 %s = sext <4 x i1> %c to <4 x i32>
125 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
126 %2 = and <4 x i32> %s, %1
127 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
128 %4 = and <4 x i32> %2, %3
129 %5 = extractelement <4 x i32> %4, i64 0
130 %6 = sext i32 %5 to i64
134 define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
135 ; SSE-LABEL: test_v4f32_sext:
137 ; SSE-NEXT: cmpltps %xmm0, %xmm1
138 ; SSE-NEXT: movmskps %xmm1, %ecx
139 ; SSE-NEXT: xorl %eax, %eax
140 ; SSE-NEXT: cmpl $15, %ecx
142 ; SSE-NEXT: negl %eax
145 ; AVX-LABEL: test_v4f32_sext:
147 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
148 ; AVX-NEXT: xorl %eax, %eax
149 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
150 ; AVX-NEXT: vtestps %xmm1, %xmm0
151 ; AVX-NEXT: sbbl %eax, %eax
153 %c = fcmp ogt <4 x float> %a0, %a1
154 %s = sext <4 x i1> %c to <4 x i32>
155 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
156 %2 = and <4 x i32> %s, %1
157 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
158 %4 = and <4 x i32> %2, %3
159 %5 = extractelement <4 x i32> %4, i32 0
163 define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
164 ; SSE-LABEL: test_v8f32_sext:
166 ; SSE-NEXT: cmpltps %xmm1, %xmm3
167 ; SSE-NEXT: cmpltps %xmm0, %xmm2
168 ; SSE-NEXT: andps %xmm3, %xmm2
169 ; SSE-NEXT: movmskps %xmm2, %ecx
170 ; SSE-NEXT: xorl %eax, %eax
171 ; SSE-NEXT: cmpl $15, %ecx
173 ; SSE-NEXT: negl %eax
176 ; AVX1-LABEL: test_v8f32_sext:
178 ; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
179 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
180 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
181 ; AVX1-NEXT: xorl %eax, %eax
182 ; AVX1-NEXT: vtestps %ymm1, %ymm0
183 ; AVX1-NEXT: sbbl %eax, %eax
184 ; AVX1-NEXT: vzeroupper
187 ; AVX2-LABEL: test_v8f32_sext:
189 ; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
190 ; AVX2-NEXT: xorl %eax, %eax
191 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
192 ; AVX2-NEXT: vtestps %ymm1, %ymm0
193 ; AVX2-NEXT: sbbl %eax, %eax
194 ; AVX2-NEXT: vzeroupper
197 ; AVX512-LABEL: test_v8f32_sext:
199 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
200 ; AVX512-NEXT: xorl %eax, %eax
201 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
202 ; AVX512-NEXT: vtestps %ymm1, %ymm0
203 ; AVX512-NEXT: sbbl %eax, %eax
204 ; AVX512-NEXT: vzeroupper
206 %c = fcmp ogt <8 x float> %a0, %a1
207 %s = sext <8 x i1> %c to <8 x i32>
208 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
209 %2 = and <8 x i32> %s, %1
210 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
211 %4 = and <8 x i32> %2, %3
212 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
213 %6 = and <8 x i32> %4, %5
214 %7 = extractelement <8 x i32> %6, i32 0
218 define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
219 ; SSE-LABEL: test_v8f32_legal_sext:
221 ; SSE-NEXT: cmpltps %xmm1, %xmm3
222 ; SSE-NEXT: cmpltps %xmm0, %xmm2
223 ; SSE-NEXT: packssdw %xmm3, %xmm2
224 ; SSE-NEXT: pmovmskb %xmm2, %ecx
225 ; SSE-NEXT: xorl %eax, %eax
226 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
228 ; SSE-NEXT: negl %eax
231 ; AVX1OR2-LABEL: test_v8f32_legal_sext:
233 ; AVX1OR2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
234 ; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1
235 ; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
236 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %ecx
237 ; AVX1OR2-NEXT: xorl %eax, %eax
238 ; AVX1OR2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
239 ; AVX1OR2-NEXT: sete %al
240 ; AVX1OR2-NEXT: negl %eax
241 ; AVX1OR2-NEXT: vzeroupper
244 ; AVX512-LABEL: test_v8f32_legal_sext:
246 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
247 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
248 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
249 ; AVX512-NEXT: xorl %eax, %eax
250 ; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
251 ; AVX512-NEXT: sete %al
252 ; AVX512-NEXT: negl %eax
253 ; AVX512-NEXT: vzeroupper
255 %c = fcmp ogt <8 x float> %a0, %a1
256 %s = sext <8 x i1> %c to <8 x i16>
257 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
258 %2 = and <8 x i16> %s, %1
259 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
260 %4 = and <8 x i16> %2, %3
261 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
262 %6 = and <8 x i16> %4, %5
263 %7 = extractelement <8 x i16> %6, i32 0
264 %8 = sext i16 %7 to i32
268 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
269 ; SSE2-LABEL: test_v2i64_sext:
271 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
272 ; SSE2-NEXT: pxor %xmm2, %xmm1
273 ; SSE2-NEXT: pxor %xmm2, %xmm0
274 ; SSE2-NEXT: movdqa %xmm0, %xmm2
275 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
276 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
277 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
278 ; SSE2-NEXT: pand %xmm2, %xmm1
279 ; SSE2-NEXT: por %xmm0, %xmm1
280 ; SSE2-NEXT: movmskpd %xmm1, %ecx
281 ; SSE2-NEXT: xorl %eax, %eax
282 ; SSE2-NEXT: cmpl $3, %ecx
283 ; SSE2-NEXT: sete %al
284 ; SSE2-NEXT: negq %rax
287 ; SSE42-LABEL: test_v2i64_sext:
289 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
290 ; SSE42-NEXT: movmskpd %xmm0, %ecx
291 ; SSE42-NEXT: xorl %eax, %eax
292 ; SSE42-NEXT: cmpl $3, %ecx
293 ; SSE42-NEXT: sete %al
294 ; SSE42-NEXT: negq %rax
297 ; AVX-LABEL: test_v2i64_sext:
299 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
300 ; AVX-NEXT: xorl %eax, %eax
301 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
302 ; AVX-NEXT: vtestpd %xmm1, %xmm0
303 ; AVX-NEXT: sbbq %rax, %rax
305 %c = icmp sgt <2 x i64> %a0, %a1
306 %s = sext <2 x i1> %c to <2 x i64>
307 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
308 %2 = and <2 x i64> %s, %1
309 %3 = extractelement <2 x i64> %2, i32 0
313 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
314 ; SSE2-LABEL: test_v4i64_sext:
316 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
317 ; SSE2-NEXT: pxor %xmm4, %xmm3
318 ; SSE2-NEXT: pxor %xmm4, %xmm1
319 ; SSE2-NEXT: movdqa %xmm1, %xmm5
320 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
321 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
322 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
323 ; SSE2-NEXT: pand %xmm5, %xmm3
324 ; SSE2-NEXT: por %xmm1, %xmm3
325 ; SSE2-NEXT: pxor %xmm4, %xmm2
326 ; SSE2-NEXT: pxor %xmm4, %xmm0
327 ; SSE2-NEXT: movdqa %xmm0, %xmm1
328 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
329 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
330 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
331 ; SSE2-NEXT: pand %xmm1, %xmm2
332 ; SSE2-NEXT: por %xmm0, %xmm2
333 ; SSE2-NEXT: pand %xmm3, %xmm2
334 ; SSE2-NEXT: movmskpd %xmm2, %ecx
335 ; SSE2-NEXT: xorl %eax, %eax
336 ; SSE2-NEXT: cmpl $3, %ecx
337 ; SSE2-NEXT: sete %al
338 ; SSE2-NEXT: negq %rax
341 ; SSE42-LABEL: test_v4i64_sext:
343 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
344 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
345 ; SSE42-NEXT: pand %xmm1, %xmm0
346 ; SSE42-NEXT: movmskpd %xmm0, %ecx
347 ; SSE42-NEXT: xorl %eax, %eax
348 ; SSE42-NEXT: cmpl $3, %ecx
349 ; SSE42-NEXT: sete %al
350 ; SSE42-NEXT: negq %rax
353 ; AVX1-LABEL: test_v4i64_sext:
355 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
356 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
357 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
358 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
359 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
360 ; AVX1-NEXT: xorl %eax, %eax
361 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
362 ; AVX1-NEXT: vtestpd %xmm1, %xmm0
363 ; AVX1-NEXT: sbbq %rax, %rax
364 ; AVX1-NEXT: vzeroupper
367 ; AVX2-LABEL: test_v4i64_sext:
369 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
370 ; AVX2-NEXT: xorl %eax, %eax
371 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
372 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
373 ; AVX2-NEXT: sbbq %rax, %rax
374 ; AVX2-NEXT: vzeroupper
377 ; AVX512-LABEL: test_v4i64_sext:
379 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
380 ; AVX512-NEXT: xorl %eax, %eax
381 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
382 ; AVX512-NEXT: vtestpd %ymm1, %ymm0
383 ; AVX512-NEXT: sbbq %rax, %rax
384 ; AVX512-NEXT: vzeroupper
386 %c = icmp sgt <4 x i64> %a0, %a1
387 %s = sext <4 x i1> %c to <4 x i64>
388 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
389 %2 = and <4 x i64> %s, %1
390 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
391 %4 = and <4 x i64> %2, %3
392 %5 = extractelement <4 x i64> %4, i64 0
396 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
397 ; SSE2-LABEL: test_v4i64_legal_sext:
399 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
400 ; SSE2-NEXT: pxor %xmm4, %xmm3
401 ; SSE2-NEXT: pxor %xmm4, %xmm1
402 ; SSE2-NEXT: movdqa %xmm1, %xmm5
403 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
404 ; SSE2-NEXT: pxor %xmm4, %xmm2
405 ; SSE2-NEXT: pxor %xmm4, %xmm0
406 ; SSE2-NEXT: movdqa %xmm0, %xmm4
407 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
408 ; SSE2-NEXT: movdqa %xmm4, %xmm6
409 ; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
410 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
411 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
412 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
413 ; SSE2-NEXT: andps %xmm6, %xmm0
414 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
415 ; SSE2-NEXT: orps %xmm0, %xmm4
416 ; SSE2-NEXT: movmskps %xmm4, %ecx
417 ; SSE2-NEXT: xorl %eax, %eax
418 ; SSE2-NEXT: cmpl $15, %ecx
419 ; SSE2-NEXT: sete %al
420 ; SSE2-NEXT: negq %rax
423 ; SSE42-LABEL: test_v4i64_legal_sext:
425 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
426 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
427 ; SSE42-NEXT: packssdw %xmm1, %xmm0
428 ; SSE42-NEXT: movmskps %xmm0, %ecx
429 ; SSE42-NEXT: xorl %eax, %eax
430 ; SSE42-NEXT: cmpl $15, %ecx
431 ; SSE42-NEXT: sete %al
432 ; SSE42-NEXT: negq %rax
435 ; AVX1-LABEL: test_v4i64_legal_sext:
437 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
438 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
439 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
440 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
441 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
442 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
443 ; AVX1-NEXT: xorl %eax, %eax
444 ; AVX1-NEXT: vtestps %xmm1, %xmm0
445 ; AVX1-NEXT: sbbq %rax, %rax
446 ; AVX1-NEXT: vzeroupper
449 ; AVX2-LABEL: test_v4i64_legal_sext:
451 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
452 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
453 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
454 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
455 ; AVX2-NEXT: xorl %eax, %eax
456 ; AVX2-NEXT: vtestps %xmm1, %xmm0
457 ; AVX2-NEXT: sbbq %rax, %rax
458 ; AVX2-NEXT: vzeroupper
461 ; AVX512-LABEL: test_v4i64_legal_sext:
463 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
464 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
465 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z}
466 ; AVX512-NEXT: xorl %eax, %eax
467 ; AVX512-NEXT: vtestps %xmm0, %xmm1
468 ; AVX512-NEXT: sbbq %rax, %rax
469 ; AVX512-NEXT: vzeroupper
471 %c = icmp sgt <4 x i64> %a0, %a1
472 %s = sext <4 x i1> %c to <4 x i32>
473 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
474 %2 = and <4 x i32> %s, %1
475 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
476 %4 = and <4 x i32> %2, %3
477 %5 = extractelement <4 x i32> %4, i64 0
478 %6 = sext i32 %5 to i64
482 define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
483 ; SSE-LABEL: test_v4i32_sext:
485 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
486 ; SSE-NEXT: movmskps %xmm0, %ecx
487 ; SSE-NEXT: xorl %eax, %eax
488 ; SSE-NEXT: cmpl $15, %ecx
490 ; SSE-NEXT: negl %eax
493 ; AVX-LABEL: test_v4i32_sext:
495 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
496 ; AVX-NEXT: xorl %eax, %eax
497 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
498 ; AVX-NEXT: vtestps %xmm1, %xmm0
499 ; AVX-NEXT: sbbl %eax, %eax
501 %c = icmp sgt <4 x i32> %a0, %a1
502 %s = sext <4 x i1> %c to <4 x i32>
503 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
504 %2 = and <4 x i32> %s, %1
505 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
506 %4 = and <4 x i32> %2, %3
507 %5 = extractelement <4 x i32> %4, i32 0
511 define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
512 ; SSE-LABEL: test_v8i32_sext:
514 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
515 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
516 ; SSE-NEXT: pand %xmm1, %xmm0
517 ; SSE-NEXT: movmskps %xmm0, %ecx
518 ; SSE-NEXT: xorl %eax, %eax
519 ; SSE-NEXT: cmpl $15, %ecx
521 ; SSE-NEXT: negl %eax
524 ; AVX1-LABEL: test_v8i32_sext:
526 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
527 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
528 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
529 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
530 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
531 ; AVX1-NEXT: xorl %eax, %eax
532 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
533 ; AVX1-NEXT: vtestps %xmm1, %xmm0
534 ; AVX1-NEXT: sbbl %eax, %eax
535 ; AVX1-NEXT: vzeroupper
538 ; AVX2-LABEL: test_v8i32_sext:
540 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
541 ; AVX2-NEXT: xorl %eax, %eax
542 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
543 ; AVX2-NEXT: vtestps %ymm1, %ymm0
544 ; AVX2-NEXT: sbbl %eax, %eax
545 ; AVX2-NEXT: vzeroupper
548 ; AVX512-LABEL: test_v8i32_sext:
550 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
551 ; AVX512-NEXT: xorl %eax, %eax
552 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
553 ; AVX512-NEXT: vtestps %ymm1, %ymm0
554 ; AVX512-NEXT: sbbl %eax, %eax
555 ; AVX512-NEXT: vzeroupper
557 %c = icmp sgt <8 x i32> %a0, %a1
558 %s = sext <8 x i1> %c to <8 x i32>
559 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
560 %2 = and <8 x i32> %s, %1
561 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
562 %4 = and <8 x i32> %2, %3
563 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
564 %6 = and <8 x i32> %4, %5
565 %7 = extractelement <8 x i32> %6, i32 0
569 define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
570 ; SSE-LABEL: test_v8i32_legal_sext:
572 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
573 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
574 ; SSE-NEXT: packssdw %xmm1, %xmm0
575 ; SSE-NEXT: pmovmskb %xmm0, %ecx
576 ; SSE-NEXT: xorl %eax, %eax
577 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
579 ; SSE-NEXT: negl %eax
582 ; AVX1-LABEL: test_v8i32_legal_sext:
584 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
585 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
586 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
587 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
588 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
589 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
590 ; AVX1-NEXT: xorl %eax, %eax
591 ; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
592 ; AVX1-NEXT: sete %al
593 ; AVX1-NEXT: negl %eax
594 ; AVX1-NEXT: vzeroupper
597 ; AVX2-LABEL: test_v8i32_legal_sext:
599 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
600 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
601 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
602 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
603 ; AVX2-NEXT: xorl %eax, %eax
604 ; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
605 ; AVX2-NEXT: sete %al
606 ; AVX2-NEXT: negl %eax
607 ; AVX2-NEXT: vzeroupper
610 ; AVX512-LABEL: test_v8i32_legal_sext:
612 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
613 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
614 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
615 ; AVX512-NEXT: xorl %eax, %eax
616 ; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
617 ; AVX512-NEXT: sete %al
618 ; AVX512-NEXT: negl %eax
619 ; AVX512-NEXT: vzeroupper
621 %c = icmp sgt <8 x i32> %a0, %a1
622 %s = sext <8 x i1> %c to <8 x i16>
623 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
624 %2 = and <8 x i16> %s, %1
625 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
626 %4 = and <8 x i16> %2, %3
627 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
628 %6 = and <8 x i16> %4, %5
629 %7 = extractelement <8 x i16> %6, i32 0
630 %8 = sext i16 %7 to i32
634 define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
635 ; SSE-LABEL: test_v8i16_sext:
637 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
638 ; SSE-NEXT: pmovmskb %xmm0, %ecx
639 ; SSE-NEXT: xorl %eax, %eax
640 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
642 ; SSE-NEXT: negl %eax
643 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
646 ; AVX-LABEL: test_v8i16_sext:
648 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
649 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
650 ; AVX-NEXT: xorl %eax, %eax
651 ; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
653 ; AVX-NEXT: negl %eax
654 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
656 %c = icmp sgt <8 x i16> %a0, %a1
657 %s = sext <8 x i1> %c to <8 x i16>
658 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
659 %2 = and <8 x i16> %s, %1
660 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
661 %4 = and <8 x i16> %2, %3
662 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
663 %6 = and <8 x i16> %4, %5
664 %7 = extractelement <8 x i16> %6, i32 0
668 define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
669 ; SSE-LABEL: test_v16i16_sext:
671 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
672 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
673 ; SSE-NEXT: pand %xmm1, %xmm0
674 ; SSE-NEXT: pmovmskb %xmm0, %ecx
675 ; SSE-NEXT: xorl %eax, %eax
676 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
678 ; SSE-NEXT: negl %eax
679 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
682 ; AVX1-LABEL: test_v16i16_sext:
684 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
685 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
686 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
687 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
688 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
689 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
690 ; AVX1-NEXT: xorl %eax, %eax
691 ; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
692 ; AVX1-NEXT: sete %al
693 ; AVX1-NEXT: negl %eax
694 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
695 ; AVX1-NEXT: vzeroupper
698 ; AVX2-LABEL: test_v16i16_sext:
700 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
701 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
702 ; AVX2-NEXT: xorl %eax, %eax
703 ; AVX2-NEXT: cmpl $-1, %ecx
704 ; AVX2-NEXT: sete %al
705 ; AVX2-NEXT: negl %eax
706 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
707 ; AVX2-NEXT: vzeroupper
710 ; AVX512-LABEL: test_v16i16_sext:
712 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
713 ; AVX512-NEXT: vpmovmskb %ymm0, %ecx
714 ; AVX512-NEXT: xorl %eax, %eax
715 ; AVX512-NEXT: cmpl $-1, %ecx
716 ; AVX512-NEXT: sete %al
717 ; AVX512-NEXT: negl %eax
718 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
719 ; AVX512-NEXT: vzeroupper
721 %c = icmp sgt <16 x i16> %a0, %a1
722 %s = sext <16 x i1> %c to <16 x i16>
723 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
724 %2 = and <16 x i16> %s, %1
725 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
726 %4 = and <16 x i16> %2, %3
727 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
728 %6 = and <16 x i16> %4, %5
729 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
730 %8 = and <16 x i16> %6, %7
731 %9 = extractelement <16 x i16> %8, i32 0
735 define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
736 ; SSE-LABEL: test_v16i16_legal_sext:
738 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
739 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
740 ; SSE-NEXT: packsswb %xmm1, %xmm0
741 ; SSE-NEXT: pmovmskb %xmm0, %eax
742 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
745 ; SSE-NEXT: movsbl %al, %eax
746 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
749 ; AVX1-LABEL: test_v16i16_legal_sext:
751 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
752 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
753 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
754 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
755 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
756 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
757 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
758 ; AVX1-NEXT: sete %al
759 ; AVX1-NEXT: negb %al
760 ; AVX1-NEXT: movsbl %al, %eax
761 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
762 ; AVX1-NEXT: vzeroupper
765 ; AVX2-LABEL: test_v16i16_legal_sext:
767 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
768 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
769 ; AVX2-NEXT: cmpl $-1, %eax
770 ; AVX2-NEXT: sete %al
771 ; AVX2-NEXT: negb %al
772 ; AVX2-NEXT: movsbl %al, %eax
773 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
774 ; AVX2-NEXT: vzeroupper
777 ; AVX512-LABEL: test_v16i16_legal_sext:
779 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
780 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
781 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
782 ; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF
783 ; AVX512-NEXT: sete %al
784 ; AVX512-NEXT: negb %al
785 ; AVX512-NEXT: movsbl %al, %eax
786 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
787 ; AVX512-NEXT: vzeroupper
789 %c = icmp sgt <16 x i16> %a0, %a1
790 %s = sext <16 x i1> %c to <16 x i8>
791 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
792 %2 = and <16 x i8> %s, %1
793 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
794 %4 = and <16 x i8> %2, %3
795 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
796 %6 = and <16 x i8> %4, %5
797 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
798 %8 = and <16 x i8> %6, %7
799 %9 = extractelement <16 x i8> %8, i32 0
800 %10 = sext i8 %9 to i16
804 define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
805 ; SSE-LABEL: test_v16i8_sext:
807 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
808 ; SSE-NEXT: pmovmskb %xmm0, %eax
809 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
814 ; AVX-LABEL: test_v16i8_sext:
816 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
817 ; AVX-NEXT: vpmovmskb %xmm0, %eax
818 ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
822 %c = icmp sgt <16 x i8> %a0, %a1
823 %s = sext <16 x i1> %c to <16 x i8>
824 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
825 %2 = and <16 x i8> %s, %1
826 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
827 %4 = and <16 x i8> %2, %3
828 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
829 %6 = and <16 x i8> %4, %5
830 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
831 %8 = and <16 x i8> %6, %7
832 %9 = extractelement <16 x i8> %8, i32 0
836 define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
837 ; SSE-LABEL: test_v32i8_sext:
839 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
840 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
841 ; SSE-NEXT: pand %xmm1, %xmm0
842 ; SSE-NEXT: pmovmskb %xmm0, %eax
843 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
848 ; AVX1-LABEL: test_v32i8_sext:
850 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
851 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
852 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
853 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
854 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
855 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
856 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
857 ; AVX1-NEXT: sete %al
858 ; AVX1-NEXT: negb %al
859 ; AVX1-NEXT: vzeroupper
862 ; AVX2-LABEL: test_v32i8_sext:
864 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
865 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
866 ; AVX2-NEXT: cmpl $-1, %eax
867 ; AVX2-NEXT: sete %al
868 ; AVX2-NEXT: negb %al
869 ; AVX2-NEXT: vzeroupper
872 ; AVX512-LABEL: test_v32i8_sext:
874 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
875 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
876 ; AVX512-NEXT: cmpl $-1, %eax
877 ; AVX512-NEXT: sete %al
878 ; AVX512-NEXT: negb %al
879 ; AVX512-NEXT: vzeroupper
881 %c = icmp sgt <32 x i8> %a0, %a1
882 %s = sext <32 x i1> %c to <32 x i8>
883 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
884 %2 = and <32 x i8> %s, %1
885 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
886 %4 = and <32 x i8> %2, %3
887 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
888 %6 = and <32 x i8> %4, %5
889 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
890 %8 = and <32 x i8> %6, %7
891 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
892 %10 = and <32 x i8> %8, %9
893 %11 = extractelement <32 x i8> %10, i32 0
897 ; Should not "MOVMSK(PCMPEQ(..)) -> PTESTZ(..)" when cmp result has muti-uses.
898 define i32 @test_v32i8_muti_uses(<32 x i8> %x, <32 x i8>%y, i32 %z) {
899 ; SSE-LABEL: test_v32i8_muti_uses:
901 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
902 ; SSE-NEXT: pmovmskb %xmm0, %eax
903 ; SSE-NEXT: pcmpeqb %xmm3, %xmm1
904 ; SSE-NEXT: pmovmskb %xmm1, %ecx
905 ; SSE-NEXT: shll $16, %ecx
906 ; SSE-NEXT: orl %eax, %ecx
907 ; SSE-NEXT: cmpl $-1, %ecx
908 ; SSE-NEXT: movl $16, %eax
909 ; SSE-NEXT: cmovnel %ecx, %eax
912 ; AVX1-LABEL: test_v32i8_muti_uses:
914 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
915 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
916 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
917 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
918 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
919 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
920 ; AVX1-NEXT: shll $16, %ecx
921 ; AVX1-NEXT: orl %eax, %ecx
922 ; AVX1-NEXT: cmpl $-1, %ecx
923 ; AVX1-NEXT: movl $16, %eax
924 ; AVX1-NEXT: cmovnel %ecx, %eax
925 ; AVX1-NEXT: vzeroupper
928 ; AVX2-LABEL: test_v32i8_muti_uses:
930 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
931 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
932 ; AVX2-NEXT: cmpl $-1, %ecx
933 ; AVX2-NEXT: movl $16, %eax
934 ; AVX2-NEXT: cmovnel %ecx, %eax
935 ; AVX2-NEXT: vzeroupper
938 ; AVX512-LABEL: test_v32i8_muti_uses:
940 ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
941 ; AVX512-NEXT: kortestd %k0, %k0
942 ; AVX512-NEXT: kmovd %k0, %ecx
943 ; AVX512-NEXT: movl $16, %eax
944 ; AVX512-NEXT: cmovael %ecx, %eax
945 ; AVX512-NEXT: vzeroupper
947 %a = icmp eq <32 x i8> %x, %y
948 %b = bitcast <32 x i1> %a to i32
949 %c = icmp eq i32 %b, -1
950 %res = select i1 %c, i32 16, i32 %b
954 define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
955 ; SSE-LABEL: bool_reduction_v2f64:
957 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
958 ; SSE-NEXT: movmskpd %xmm1, %eax
959 ; SSE-NEXT: cmpl $3, %eax
963 ; AVX1OR2-LABEL: bool_reduction_v2f64:
965 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
966 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
967 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
968 ; AVX1OR2-NEXT: setb %al
971 ; AVX512-LABEL: bool_reduction_v2f64:
973 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
974 ; AVX512-NEXT: kmovd %k0, %eax
975 ; AVX512-NEXT: cmpb $3, %al
976 ; AVX512-NEXT: sete %al
978 %a = fcmp ogt <2 x double> %x, %y
979 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
980 %c = and <2 x i1> %a, %b
981 %d = extractelement <2 x i1> %c, i32 0
985 define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
986 ; SSE-LABEL: bool_reduction_v4f32:
988 ; SSE-NEXT: cmpeqps %xmm1, %xmm0
989 ; SSE-NEXT: movmskps %xmm0, %eax
990 ; SSE-NEXT: cmpl $15, %eax
994 ; AVX1OR2-LABEL: bool_reduction_v4f32:
996 ; AVX1OR2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
997 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
998 ; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
999 ; AVX1OR2-NEXT: setb %al
1000 ; AVX1OR2-NEXT: retq
1002 ; AVX512-LABEL: bool_reduction_v4f32:
1004 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
1005 ; AVX512-NEXT: kmovd %k0, %eax
1006 ; AVX512-NEXT: cmpb $15, %al
1007 ; AVX512-NEXT: sete %al
1009 %a = fcmp oeq <4 x float> %x, %y
1010 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1011 %b = and <4 x i1> %s1, %a
1012 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1013 %c = and <4 x i1> %s2, %b
1014 %d = extractelement <4 x i1> %c, i32 0
1018 define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
1019 ; SSE-LABEL: bool_reduction_v4f64:
1021 ; SSE-NEXT: cmplepd %xmm1, %xmm3
1022 ; SSE-NEXT: cmplepd %xmm0, %xmm2
1023 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1024 ; SSE-NEXT: movmskps %xmm2, %eax
1025 ; SSE-NEXT: cmpl $15, %eax
1026 ; SSE-NEXT: sete %al
1029 ; AVX1-LABEL: bool_reduction_v4f64:
1031 ; AVX1-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
1032 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1033 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
1034 ; AVX1-NEXT: vtestpd %ymm1, %ymm0
1035 ; AVX1-NEXT: setb %al
1036 ; AVX1-NEXT: vzeroupper
1039 ; AVX2-LABEL: bool_reduction_v4f64:
1041 ; AVX2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
1042 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1043 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
1044 ; AVX2-NEXT: setb %al
1045 ; AVX2-NEXT: vzeroupper
1048 ; AVX512-LABEL: bool_reduction_v4f64:
1050 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
1051 ; AVX512-NEXT: kmovd %k0, %eax
1052 ; AVX512-NEXT: cmpb $15, %al
1053 ; AVX512-NEXT: sete %al
1054 ; AVX512-NEXT: vzeroupper
1056 %a = fcmp oge <4 x double> %x, %y
1057 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1058 %b = and <4 x i1> %s1, %a
1059 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1060 %c = and <4 x i1> %s2, %b
1061 %d = extractelement <4 x i1> %c, i32 0
1065 define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
1066 ; SSE-LABEL: bool_reduction_v8f32:
1068 ; SSE-NEXT: cmpneqps %xmm3, %xmm1
1069 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
1070 ; SSE-NEXT: packssdw %xmm1, %xmm0
1071 ; SSE-NEXT: packsswb %xmm0, %xmm0
1072 ; SSE-NEXT: pmovmskb %xmm0, %eax
1073 ; SSE-NEXT: cmpb $-1, %al
1074 ; SSE-NEXT: sete %al
1077 ; AVX1-LABEL: bool_reduction_v8f32:
1079 ; AVX1-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
1080 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1081 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
1082 ; AVX1-NEXT: vtestps %ymm1, %ymm0
1083 ; AVX1-NEXT: setb %al
1084 ; AVX1-NEXT: vzeroupper
1087 ; AVX2-LABEL: bool_reduction_v8f32:
1089 ; AVX2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
1090 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1091 ; AVX2-NEXT: vtestps %ymm1, %ymm0
1092 ; AVX2-NEXT: setb %al
1093 ; AVX2-NEXT: vzeroupper
1096 ; AVX512-LABEL: bool_reduction_v8f32:
1098 ; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0
1099 ; AVX512-NEXT: kmovd %k0, %eax
1100 ; AVX512-NEXT: cmpb $-1, %al
1101 ; AVX512-NEXT: sete %al
1102 ; AVX512-NEXT: vzeroupper
1104 %a = fcmp une <8 x float> %x, %y
1105 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1106 %b = and <8 x i1> %s1, %a
1107 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1108 %c = and <8 x i1> %s2, %b
1109 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1110 %d = and <8 x i1> %s3, %c
1111 %e = extractelement <8 x i1> %d, i32 0
1115 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
1116 ; SSE2-LABEL: bool_reduction_v2i64:
1118 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1119 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1120 ; SSE2-NEXT: pand %xmm0, %xmm1
1121 ; SSE2-NEXT: movmskpd %xmm1, %eax
1122 ; SSE2-NEXT: testl %eax, %eax
1123 ; SSE2-NEXT: sete %al
1126 ; SSE42-LABEL: bool_reduction_v2i64:
1128 ; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
1129 ; SSE42-NEXT: movmskpd %xmm0, %eax
1130 ; SSE42-NEXT: testl %eax, %eax
1131 ; SSE42-NEXT: sete %al
1134 ; AVX1OR2-LABEL: bool_reduction_v2i64:
1136 ; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
1137 ; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
1138 ; AVX1OR2-NEXT: sete %al
1139 ; AVX1OR2-NEXT: retq
1141 ; AVX512-LABEL: bool_reduction_v2i64:
1143 ; AVX512-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
1144 ; AVX512-NEXT: kmovd %k0, %eax
1145 ; AVX512-NEXT: cmpb $3, %al
1146 ; AVX512-NEXT: sete %al
1148 %a = icmp ne <2 x i64> %x, %y
1149 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
1150 %c = and <2 x i1> %a, %b
1151 %d = extractelement <2 x i1> %c, i32 0
1155 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
1156 ; SSE2-LABEL: bool_reduction_v4i32:
1158 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1159 ; SSE2-NEXT: pxor %xmm2, %xmm1
1160 ; SSE2-NEXT: pxor %xmm2, %xmm0
1161 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1162 ; SSE2-NEXT: movmskps %xmm0, %eax
1163 ; SSE2-NEXT: cmpl $15, %eax
1164 ; SSE2-NEXT: sete %al
1167 ; SSE42-LABEL: bool_reduction_v4i32:
1169 ; SSE42-NEXT: pminud %xmm0, %xmm1
1170 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm1
1171 ; SSE42-NEXT: movmskps %xmm1, %eax
1172 ; SSE42-NEXT: testl %eax, %eax
1173 ; SSE42-NEXT: sete %al
1176 ; AVX1OR2-LABEL: bool_reduction_v4i32:
1178 ; AVX1OR2-NEXT: vpminud %xmm1, %xmm0, %xmm1
1179 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1180 ; AVX1OR2-NEXT: vtestps %xmm0, %xmm0
1181 ; AVX1OR2-NEXT: sete %al
1182 ; AVX1OR2-NEXT: retq
1184 ; AVX512-LABEL: bool_reduction_v4i32:
1186 ; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
1187 ; AVX512-NEXT: kmovd %k0, %eax
1188 ; AVX512-NEXT: cmpb $15, %al
1189 ; AVX512-NEXT: sete %al
1191 %a = icmp ugt <4 x i32> %x, %y
1192 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1193 %b = and <4 x i1> %s1, %a
1194 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1195 %c = and <4 x i1> %s2, %b
1196 %d = extractelement <4 x i1> %c, i32 0
1200 define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
1201 ; SSE-LABEL: bool_reduction_v8i16:
1203 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1
1204 ; SSE-NEXT: packsswb %xmm1, %xmm1
1205 ; SSE-NEXT: pmovmskb %xmm1, %eax
1206 ; SSE-NEXT: cmpb $-1, %al
1207 ; SSE-NEXT: sete %al
1210 ; AVX1OR2-LABEL: bool_reduction_v8i16:
1212 ; AVX1OR2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1213 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1214 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1215 ; AVX1OR2-NEXT: cmpb $-1, %al
1216 ; AVX1OR2-NEXT: sete %al
1217 ; AVX1OR2-NEXT: retq
1219 ; AVX512-LABEL: bool_reduction_v8i16:
1221 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0
1222 ; AVX512-NEXT: kmovd %k0, %eax
1223 ; AVX512-NEXT: cmpb $-1, %al
1224 ; AVX512-NEXT: sete %al
1226 %a = icmp slt <8 x i16> %x, %y
1227 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1228 %b = and <8 x i1> %s1, %a
1229 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1230 %c = and <8 x i1> %s2, %b
1231 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1232 %d = and <8 x i1> %s3, %c
1233 %e = extractelement <8 x i1> %d, i32 0
1237 define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1238 ; SSE-LABEL: bool_reduction_v16i8:
1240 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
1241 ; SSE-NEXT: pmovmskb %xmm0, %eax
1242 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1243 ; SSE-NEXT: sete %al
1246 ; AVX1OR2-LABEL: bool_reduction_v16i8:
1248 ; AVX1OR2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
1249 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1250 ; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1251 ; AVX1OR2-NEXT: sete %al
1252 ; AVX1OR2-NEXT: retq
1254 ; AVX512-LABEL: bool_reduction_v16i8:
1256 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
1257 ; AVX512-NEXT: kortestw %k0, %k0
1258 ; AVX512-NEXT: setb %al
1260 %a = icmp sgt <16 x i8> %x, %y
1261 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1262 %b = and <16 x i1> %s1, %a
1263 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1264 %c = and <16 x i1> %s2, %b
1265 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1266 %d = and <16 x i1> %s3, %c
1267 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1268 %e = and <16 x i1> %s4, %d
1269 %f = extractelement <16 x i1> %e, i32 0
1273 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1274 ; SSE2-LABEL: bool_reduction_v4i64:
1276 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1277 ; SSE2-NEXT: pxor %xmm4, %xmm1
1278 ; SSE2-NEXT: pxor %xmm4, %xmm3
1279 ; SSE2-NEXT: movdqa %xmm3, %xmm5
1280 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
1281 ; SSE2-NEXT: pxor %xmm4, %xmm0
1282 ; SSE2-NEXT: pxor %xmm4, %xmm2
1283 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1284 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1285 ; SSE2-NEXT: movdqa %xmm4, %xmm6
1286 ; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
1287 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1288 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
1289 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
1290 ; SSE2-NEXT: andps %xmm6, %xmm2
1291 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
1292 ; SSE2-NEXT: orps %xmm2, %xmm4
1293 ; SSE2-NEXT: movmskps %xmm4, %eax
1294 ; SSE2-NEXT: cmpl $15, %eax
1295 ; SSE2-NEXT: sete %al
1298 ; SSE42-LABEL: bool_reduction_v4i64:
1300 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3
1301 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
1302 ; SSE42-NEXT: packssdw %xmm3, %xmm2
1303 ; SSE42-NEXT: movmskps %xmm2, %eax
1304 ; SSE42-NEXT: cmpl $15, %eax
1305 ; SSE42-NEXT: sete %al
1308 ; AVX1-LABEL: bool_reduction_v4i64:
1310 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1311 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1312 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1313 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1314 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1315 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1316 ; AVX1-NEXT: vtestpd %xmm1, %xmm0
1317 ; AVX1-NEXT: setb %al
1318 ; AVX1-NEXT: vzeroupper
1321 ; AVX2-LABEL: bool_reduction_v4i64:
1323 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
1324 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1325 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
1326 ; AVX2-NEXT: setb %al
1327 ; AVX2-NEXT: vzeroupper
1330 ; AVX512-LABEL: bool_reduction_v4i64:
1332 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
1333 ; AVX512-NEXT: kmovd %k0, %eax
1334 ; AVX512-NEXT: cmpb $15, %al
1335 ; AVX512-NEXT: sete %al
1336 ; AVX512-NEXT: vzeroupper
1338 %a = icmp slt <4 x i64> %x, %y
1339 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1340 %b = and <4 x i1> %s1, %a
1341 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1342 %c = and <4 x i1> %s2, %b
1343 %d = extractelement <4 x i1> %c, i32 0
1347 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1348 ; SSE2-LABEL: bool_reduction_v8i32:
1350 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1351 ; SSE2-NEXT: pxor %xmm4, %xmm3
1352 ; SSE2-NEXT: pxor %xmm4, %xmm1
1353 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
1354 ; SSE2-NEXT: pxor %xmm4, %xmm2
1355 ; SSE2-NEXT: pxor %xmm4, %xmm0
1356 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1357 ; SSE2-NEXT: packssdw %xmm1, %xmm0
1358 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1359 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1360 ; SSE2-NEXT: notl %eax
1361 ; SSE2-NEXT: cmpb $-1, %al
1362 ; SSE2-NEXT: sete %al
1365 ; SSE42-LABEL: bool_reduction_v8i32:
1367 ; SSE42-NEXT: pminud %xmm1, %xmm3
1368 ; SSE42-NEXT: pcmpeqd %xmm1, %xmm3
1369 ; SSE42-NEXT: pminud %xmm0, %xmm2
1370 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
1371 ; SSE42-NEXT: packssdw %xmm3, %xmm2
1372 ; SSE42-NEXT: packsswb %xmm2, %xmm2
1373 ; SSE42-NEXT: pmovmskb %xmm2, %eax
1374 ; SSE42-NEXT: cmpb $-1, %al
1375 ; SSE42-NEXT: sete %al
1378 ; AVX1-LABEL: bool_reduction_v8i32:
1380 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1381 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1382 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1383 ; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2
1384 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
1385 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1386 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1387 ; AVX1-NEXT: vptest %xmm0, %xmm0
1388 ; AVX1-NEXT: sete %al
1389 ; AVX1-NEXT: vzeroupper
1392 ; AVX2-LABEL: bool_reduction_v8i32:
1394 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
1395 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
1396 ; AVX2-NEXT: vptest %ymm0, %ymm0
1397 ; AVX2-NEXT: sete %al
1398 ; AVX2-NEXT: vzeroupper
1401 ; AVX512-LABEL: bool_reduction_v8i32:
1403 ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0
1404 ; AVX512-NEXT: kmovd %k0, %eax
1405 ; AVX512-NEXT: cmpb $-1, %al
1406 ; AVX512-NEXT: sete %al
1407 ; AVX512-NEXT: vzeroupper
1409 %a = icmp ule <8 x i32> %x, %y
1410 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1411 %b = and <8 x i1> %s1, %a
1412 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1413 %c = and <8 x i1> %s2, %b
1414 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1415 %d = and <8 x i1> %s3, %c
1416 %e = extractelement <8 x i1> %d, i32 0
1420 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1421 ; SSE2-LABEL: bool_reduction_v16i16:
1423 ; SSE2-NEXT: pcmpeqb %xmm3, %xmm1
1424 ; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
1425 ; SSE2-NEXT: pand %xmm1, %xmm0
1426 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1427 ; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
1428 ; SSE2-NEXT: sete %al
1431 ; SSE42-LABEL: bool_reduction_v16i16:
1433 ; SSE42-NEXT: pxor %xmm3, %xmm1
1434 ; SSE42-NEXT: pxor %xmm2, %xmm0
1435 ; SSE42-NEXT: por %xmm1, %xmm0
1436 ; SSE42-NEXT: ptest %xmm0, %xmm0
1437 ; SSE42-NEXT: sete %al
1440 ; AVX1-LABEL: bool_reduction_v16i16:
1442 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
1443 ; AVX1-NEXT: vptest %ymm0, %ymm0
1444 ; AVX1-NEXT: sete %al
1445 ; AVX1-NEXT: vzeroupper
1448 ; AVX2-LABEL: bool_reduction_v16i16:
1450 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
1451 ; AVX2-NEXT: vptest %ymm0, %ymm0
1452 ; AVX2-NEXT: sete %al
1453 ; AVX2-NEXT: vzeroupper
1456 ; AVX512-LABEL: bool_reduction_v16i16:
1458 ; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
1459 ; AVX512-NEXT: vptest %ymm0, %ymm0
1460 ; AVX512-NEXT: sete %al
1461 ; AVX512-NEXT: vzeroupper
1463 %a = icmp eq <16 x i16> %x, %y
1464 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1465 %b = and <16 x i1> %s1, %a
1466 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1467 %c = and <16 x i1> %s2, %b
1468 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1469 %d = and <16 x i1> %s3, %c
1470 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1471 %e = and <16 x i1> %s4, %d
1472 %f = extractelement <16 x i1> %e, i32 0
1476 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1477 ; SSE2-LABEL: bool_reduction_v32i8:
1479 ; SSE2-NEXT: pcmpeqb %xmm3, %xmm1
1480 ; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
1481 ; SSE2-NEXT: pand %xmm1, %xmm0
1482 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1483 ; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
1484 ; SSE2-NEXT: sete %al
1487 ; SSE42-LABEL: bool_reduction_v32i8:
1489 ; SSE42-NEXT: pxor %xmm3, %xmm1
1490 ; SSE42-NEXT: pxor %xmm2, %xmm0
1491 ; SSE42-NEXT: por %xmm1, %xmm0
1492 ; SSE42-NEXT: ptest %xmm0, %xmm0
1493 ; SSE42-NEXT: sete %al
1496 ; AVX1-LABEL: bool_reduction_v32i8:
1498 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
1499 ; AVX1-NEXT: vptest %ymm0, %ymm0
1500 ; AVX1-NEXT: sete %al
1501 ; AVX1-NEXT: vzeroupper
1504 ; AVX2-LABEL: bool_reduction_v32i8:
1506 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
1507 ; AVX2-NEXT: vptest %ymm0, %ymm0
1508 ; AVX2-NEXT: sete %al
1509 ; AVX2-NEXT: vzeroupper
1512 ; AVX512-LABEL: bool_reduction_v32i8:
1514 ; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
1515 ; AVX512-NEXT: vptest %ymm0, %ymm0
1516 ; AVX512-NEXT: sete %al
1517 ; AVX512-NEXT: vzeroupper
1519 %a = icmp eq <32 x i8> %x, %y
1520 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1521 %b = and <32 x i1> %s1, %a
1522 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1523 %c = and <32 x i1> %s2, %b
1524 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1525 %d = and <32 x i1> %s3, %c
1526 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1527 %e = and <32 x i1> %s4, %d
1528 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1529 %f = and <32 x i1> %s5, %e
1530 %g = extractelement <32 x i1> %f, i32 0
1535 define i1 @select_v2i8(ptr %s0, ptr %s1) {
1536 ; SSE2-LABEL: select_v2i8:
1538 ; SSE2-NEXT: movzwl (%rdi), %eax
1539 ; SSE2-NEXT: movd %eax, %xmm0
1540 ; SSE2-NEXT: movzwl (%rsi), %eax
1541 ; SSE2-NEXT: movd %eax, %xmm1
1542 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
1543 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1544 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
1545 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1546 ; SSE2-NEXT: movmskpd %xmm0, %eax
1547 ; SSE2-NEXT: cmpl $3, %eax
1548 ; SSE2-NEXT: sete %al
1551 ; SSE42-LABEL: select_v2i8:
1553 ; SSE42-NEXT: movzwl (%rdi), %eax
1554 ; SSE42-NEXT: movd %eax, %xmm0
1555 ; SSE42-NEXT: movzwl (%rsi), %eax
1556 ; SSE42-NEXT: movd %eax, %xmm1
1557 ; SSE42-NEXT: pcmpeqb %xmm0, %xmm1
1558 ; SSE42-NEXT: pmovsxbq %xmm1, %xmm0
1559 ; SSE42-NEXT: movmskpd %xmm0, %eax
1560 ; SSE42-NEXT: cmpl $3, %eax
1561 ; SSE42-NEXT: sete %al
1564 ; AVX1OR2-LABEL: select_v2i8:
1566 ; AVX1OR2-NEXT: movzwl (%rdi), %eax
1567 ; AVX1OR2-NEXT: vmovd %eax, %xmm0
1568 ; AVX1OR2-NEXT: movzwl (%rsi), %eax
1569 ; AVX1OR2-NEXT: vmovd %eax, %xmm1
1570 ; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1571 ; AVX1OR2-NEXT: vpmovsxbq %xmm0, %xmm0
1572 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1573 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
1574 ; AVX1OR2-NEXT: setb %al
1575 ; AVX1OR2-NEXT: retq
1577 ; AVX512-LABEL: select_v2i8:
1579 ; AVX512-NEXT: movzwl (%rdi), %eax
1580 ; AVX512-NEXT: vmovd %eax, %xmm0
1581 ; AVX512-NEXT: movzwl (%rsi), %eax
1582 ; AVX512-NEXT: vmovd %eax, %xmm1
1583 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
1584 ; AVX512-NEXT: knotw %k0, %k0
1585 ; AVX512-NEXT: kmovd %k0, %eax
1586 ; AVX512-NEXT: testb $3, %al
1587 ; AVX512-NEXT: sete %al
1589 %v0 = load <2 x i8>, ptr %s0, align 1
1590 %v1 = load <2 x i8>, ptr %s1, align 1
1591 %cmp = icmp eq <2 x i8> %v0, %v1
1592 %cmp0 = extractelement <2 x i1> %cmp, i32 0
1593 %cmp1 = extractelement <2 x i1> %cmp, i32 1
1594 %res = select i1 %cmp0, i1 %cmp1, i1 false