1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
8 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
9 ; SSE-LABEL: test_v2f64_sext:
11 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
12 ; SSE-NEXT: movmskpd %xmm1, %ecx
13 ; SSE-NEXT: xorl %eax, %eax
15 ; SSE-NEXT: sbbq %rax, %rax
18 ; AVX-LABEL: test_v2f64_sext:
20 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
21 ; AVX-NEXT: xorl %eax, %eax
22 ; AVX-NEXT: vtestpd %xmm0, %xmm0
26 %c = fcmp ogt <2 x double> %a0, %a1
27 %s = sext <2 x i1> %c to <2 x i64>
28 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
29 %2 = or <2 x i64> %s, %1
30 %3 = extractelement <2 x i64> %2, i32 0
34 define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
35 ; SSE-LABEL: test_v4f64_sext:
37 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
38 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
39 ; SSE-NEXT: orpd %xmm3, %xmm2
40 ; SSE-NEXT: movmskpd %xmm2, %ecx
41 ; SSE-NEXT: xorl %eax, %eax
43 ; SSE-NEXT: sbbq %rax, %rax
46 ; AVX-LABEL: test_v4f64_sext:
48 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
49 ; AVX-NEXT: xorl %eax, %eax
50 ; AVX-NEXT: vtestpd %ymm0, %ymm0
53 ; AVX-NEXT: vzeroupper
55 %c = fcmp ogt <4 x double> %a0, %a1
56 %s = sext <4 x i1> %c to <4 x i64>
57 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
58 %2 = or <4 x i64> %s, %1
59 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
60 %4 = or <4 x i64> %2, %3
61 %5 = extractelement <4 x i64> %4, i64 0
65 define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
66 ; SSE-LABEL: test_v4f64_legal_sext:
68 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
69 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
70 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
71 ; SSE-NEXT: movmskps %xmm2, %ecx
72 ; SSE-NEXT: xorl %eax, %eax
74 ; SSE-NEXT: sbbq %rax, %rax
77 ; AVX1OR2-LABEL: test_v4f64_legal_sext:
79 ; AVX1OR2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
80 ; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1
81 ; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
82 ; AVX1OR2-NEXT: xorl %eax, %eax
83 ; AVX1OR2-NEXT: vtestps %xmm0, %xmm0
84 ; AVX1OR2-NEXT: setne %al
85 ; AVX1OR2-NEXT: negq %rax
86 ; AVX1OR2-NEXT: vzeroupper
89 ; AVX512-LABEL: test_v4f64_legal_sext:
91 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
92 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
93 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
94 ; AVX512-NEXT: xorl %eax, %eax
95 ; AVX512-NEXT: vtestps %xmm0, %xmm0
96 ; AVX512-NEXT: setne %al
97 ; AVX512-NEXT: negq %rax
98 ; AVX512-NEXT: vzeroupper
100 %c = fcmp ogt <4 x double> %a0, %a1
101 %s = sext <4 x i1> %c to <4 x i32>
102 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
103 %2 = or <4 x i32> %s, %1
104 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
105 %4 = or <4 x i32> %2, %3
106 %5 = extractelement <4 x i32> %4, i64 0
107 %6 = sext i32 %5 to i64
111 define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
112 ; SSE-LABEL: test_v4f32_sext:
114 ; SSE-NEXT: cmpltps %xmm0, %xmm1
115 ; SSE-NEXT: movmskps %xmm1, %ecx
116 ; SSE-NEXT: xorl %eax, %eax
117 ; SSE-NEXT: negl %ecx
118 ; SSE-NEXT: sbbl %eax, %eax
121 ; AVX-LABEL: test_v4f32_sext:
123 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
124 ; AVX-NEXT: xorl %eax, %eax
125 ; AVX-NEXT: vtestps %xmm0, %xmm0
126 ; AVX-NEXT: setne %al
127 ; AVX-NEXT: negl %eax
129 %c = fcmp ogt <4 x float> %a0, %a1
130 %s = sext <4 x i1> %c to <4 x i32>
131 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
132 %2 = or <4 x i32> %s, %1
133 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
134 %4 = or <4 x i32> %2, %3
135 %5 = extractelement <4 x i32> %4, i32 0
139 define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
140 ; SSE-LABEL: test_v8f32_sext:
142 ; SSE-NEXT: cmpltps %xmm1, %xmm3
143 ; SSE-NEXT: cmpltps %xmm0, %xmm2
144 ; SSE-NEXT: orps %xmm3, %xmm2
145 ; SSE-NEXT: movmskps %xmm2, %ecx
146 ; SSE-NEXT: xorl %eax, %eax
147 ; SSE-NEXT: negl %ecx
148 ; SSE-NEXT: sbbl %eax, %eax
151 ; AVX-LABEL: test_v8f32_sext:
153 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
154 ; AVX-NEXT: xorl %eax, %eax
155 ; AVX-NEXT: vtestps %ymm0, %ymm0
156 ; AVX-NEXT: setne %al
157 ; AVX-NEXT: negl %eax
158 ; AVX-NEXT: vzeroupper
160 %c = fcmp ogt <8 x float> %a0, %a1
161 %s = sext <8 x i1> %c to <8 x i32>
162 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
163 %2 = or <8 x i32> %s, %1
164 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
165 %4 = or <8 x i32> %2, %3
166 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
167 %6 = or <8 x i32> %4, %5
168 %7 = extractelement <8 x i32> %6, i32 0
172 define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
173 ; SSE-LABEL: test_v8f32_legal_sext:
175 ; SSE-NEXT: cmpltps %xmm1, %xmm3
176 ; SSE-NEXT: cmpltps %xmm0, %xmm2
177 ; SSE-NEXT: packssdw %xmm3, %xmm2
178 ; SSE-NEXT: pmovmskb %xmm2, %ecx
179 ; SSE-NEXT: xorl %eax, %eax
180 ; SSE-NEXT: negl %ecx
181 ; SSE-NEXT: sbbl %eax, %eax
184 ; AVX1OR2-LABEL: test_v8f32_legal_sext:
186 ; AVX1OR2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
187 ; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1
188 ; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
189 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %ecx
190 ; AVX1OR2-NEXT: xorl %eax, %eax
191 ; AVX1OR2-NEXT: negl %ecx
192 ; AVX1OR2-NEXT: sbbl %eax, %eax
193 ; AVX1OR2-NEXT: vzeroupper
196 ; AVX512-LABEL: test_v8f32_legal_sext:
198 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
199 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
200 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
201 ; AVX512-NEXT: xorl %eax, %eax
202 ; AVX512-NEXT: negl %ecx
203 ; AVX512-NEXT: sbbl %eax, %eax
204 ; AVX512-NEXT: vzeroupper
206 %c = fcmp ogt <8 x float> %a0, %a1
207 %s = sext <8 x i1> %c to <8 x i16>
208 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
209 %2 = or <8 x i16> %s, %1
210 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
211 %4 = or <8 x i16> %2, %3
212 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
213 %6 = or <8 x i16> %4, %5
214 %7 = extractelement <8 x i16> %6, i32 0
215 %8 = sext i16 %7 to i32
219 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
220 ; SSE2-LABEL: test_v2i64_sext:
222 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
223 ; SSE2-NEXT: pxor %xmm2, %xmm1
224 ; SSE2-NEXT: pxor %xmm2, %xmm0
225 ; SSE2-NEXT: movdqa %xmm0, %xmm2
226 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
227 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
228 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
229 ; SSE2-NEXT: pand %xmm2, %xmm1
230 ; SSE2-NEXT: por %xmm0, %xmm1
231 ; SSE2-NEXT: movmskpd %xmm1, %ecx
232 ; SSE2-NEXT: xorl %eax, %eax
233 ; SSE2-NEXT: negl %ecx
234 ; SSE2-NEXT: sbbq %rax, %rax
237 ; SSE42-LABEL: test_v2i64_sext:
239 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
240 ; SSE42-NEXT: movmskpd %xmm0, %ecx
241 ; SSE42-NEXT: xorl %eax, %eax
242 ; SSE42-NEXT: negl %ecx
243 ; SSE42-NEXT: sbbq %rax, %rax
246 ; AVX-LABEL: test_v2i64_sext:
248 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
249 ; AVX-NEXT: xorl %eax, %eax
250 ; AVX-NEXT: vtestpd %xmm0, %xmm0
251 ; AVX-NEXT: setne %al
252 ; AVX-NEXT: negq %rax
254 %c = icmp sgt <2 x i64> %a0, %a1
255 %s = sext <2 x i1> %c to <2 x i64>
256 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
257 %2 = or <2 x i64> %s, %1
258 %3 = extractelement <2 x i64> %2, i32 0
262 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
263 ; SSE2-LABEL: test_v4i64_sext:
265 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
266 ; SSE2-NEXT: pxor %xmm4, %xmm3
267 ; SSE2-NEXT: pxor %xmm4, %xmm1
268 ; SSE2-NEXT: movdqa %xmm1, %xmm5
269 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
270 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
271 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
272 ; SSE2-NEXT: pand %xmm5, %xmm3
273 ; SSE2-NEXT: por %xmm1, %xmm3
274 ; SSE2-NEXT: pxor %xmm4, %xmm2
275 ; SSE2-NEXT: pxor %xmm4, %xmm0
276 ; SSE2-NEXT: movdqa %xmm0, %xmm1
277 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
278 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
279 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
280 ; SSE2-NEXT: pand %xmm1, %xmm2
281 ; SSE2-NEXT: por %xmm0, %xmm2
282 ; SSE2-NEXT: por %xmm3, %xmm2
283 ; SSE2-NEXT: movmskpd %xmm2, %ecx
284 ; SSE2-NEXT: xorl %eax, %eax
285 ; SSE2-NEXT: negl %ecx
286 ; SSE2-NEXT: sbbq %rax, %rax
289 ; SSE42-LABEL: test_v4i64_sext:
291 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
292 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
293 ; SSE42-NEXT: por %xmm1, %xmm0
294 ; SSE42-NEXT: movmskpd %xmm0, %ecx
295 ; SSE42-NEXT: xorl %eax, %eax
296 ; SSE42-NEXT: negl %ecx
297 ; SSE42-NEXT: sbbq %rax, %rax
300 ; AVX1-LABEL: test_v4i64_sext:
302 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
303 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
304 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
305 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
306 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
307 ; AVX1-NEXT: xorl %eax, %eax
308 ; AVX1-NEXT: vtestpd %xmm0, %xmm0
309 ; AVX1-NEXT: setne %al
310 ; AVX1-NEXT: negq %rax
311 ; AVX1-NEXT: vzeroupper
314 ; AVX2-LABEL: test_v4i64_sext:
316 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
317 ; AVX2-NEXT: xorl %eax, %eax
318 ; AVX2-NEXT: vtestpd %ymm0, %ymm0
319 ; AVX2-NEXT: setne %al
320 ; AVX2-NEXT: negq %rax
321 ; AVX2-NEXT: vzeroupper
324 ; AVX512-LABEL: test_v4i64_sext:
326 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
327 ; AVX512-NEXT: xorl %eax, %eax
328 ; AVX512-NEXT: vtestpd %ymm0, %ymm0
329 ; AVX512-NEXT: setne %al
330 ; AVX512-NEXT: negq %rax
331 ; AVX512-NEXT: vzeroupper
333 %c = icmp sgt <4 x i64> %a0, %a1
334 %s = sext <4 x i1> %c to <4 x i64>
335 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
336 %2 = or <4 x i64> %s, %1
337 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
338 %4 = or <4 x i64> %2, %3
339 %5 = extractelement <4 x i64> %4, i64 0
343 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
344 ; SSE2-LABEL: test_v4i64_legal_sext:
346 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
347 ; SSE2-NEXT: pxor %xmm4, %xmm3
348 ; SSE2-NEXT: pxor %xmm4, %xmm1
349 ; SSE2-NEXT: movdqa %xmm1, %xmm5
350 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
351 ; SSE2-NEXT: pxor %xmm4, %xmm2
352 ; SSE2-NEXT: pxor %xmm4, %xmm0
353 ; SSE2-NEXT: movdqa %xmm0, %xmm4
354 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
355 ; SSE2-NEXT: movdqa %xmm4, %xmm6
356 ; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
357 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
358 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
359 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
360 ; SSE2-NEXT: andps %xmm6, %xmm0
361 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
362 ; SSE2-NEXT: orps %xmm0, %xmm4
363 ; SSE2-NEXT: movmskps %xmm4, %ecx
364 ; SSE2-NEXT: xorl %eax, %eax
365 ; SSE2-NEXT: negl %ecx
366 ; SSE2-NEXT: sbbq %rax, %rax
369 ; SSE42-LABEL: test_v4i64_legal_sext:
371 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
372 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
373 ; SSE42-NEXT: packssdw %xmm1, %xmm0
374 ; SSE42-NEXT: movmskps %xmm0, %ecx
375 ; SSE42-NEXT: xorl %eax, %eax
376 ; SSE42-NEXT: negl %ecx
377 ; SSE42-NEXT: sbbq %rax, %rax
380 ; AVX1-LABEL: test_v4i64_legal_sext:
382 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
383 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
384 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
385 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
386 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
387 ; AVX1-NEXT: xorl %eax, %eax
388 ; AVX1-NEXT: vtestps %xmm0, %xmm0
389 ; AVX1-NEXT: setne %al
390 ; AVX1-NEXT: negq %rax
391 ; AVX1-NEXT: vzeroupper
394 ; AVX2-LABEL: test_v4i64_legal_sext:
396 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
397 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
398 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
399 ; AVX2-NEXT: xorl %eax, %eax
400 ; AVX2-NEXT: vtestps %xmm0, %xmm0
401 ; AVX2-NEXT: setne %al
402 ; AVX2-NEXT: negq %rax
403 ; AVX2-NEXT: vzeroupper
406 ; AVX512-LABEL: test_v4i64_legal_sext:
408 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
409 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
410 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
411 ; AVX512-NEXT: xorl %eax, %eax
412 ; AVX512-NEXT: vtestps %xmm0, %xmm0
413 ; AVX512-NEXT: setne %al
414 ; AVX512-NEXT: negq %rax
415 ; AVX512-NEXT: vzeroupper
417 %c = icmp sgt <4 x i64> %a0, %a1
418 %s = sext <4 x i1> %c to <4 x i32>
419 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
420 %2 = or <4 x i32> %s, %1
421 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
422 %4 = or <4 x i32> %2, %3
423 %5 = extractelement <4 x i32> %4, i64 0
424 %6 = sext i32 %5 to i64
428 define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
429 ; SSE-LABEL: test_v4i32_sext:
431 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
432 ; SSE-NEXT: movmskps %xmm0, %ecx
433 ; SSE-NEXT: xorl %eax, %eax
434 ; SSE-NEXT: negl %ecx
435 ; SSE-NEXT: sbbl %eax, %eax
438 ; AVX-LABEL: test_v4i32_sext:
440 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
441 ; AVX-NEXT: xorl %eax, %eax
442 ; AVX-NEXT: vtestps %xmm0, %xmm0
443 ; AVX-NEXT: setne %al
444 ; AVX-NEXT: negl %eax
446 %c = icmp sgt <4 x i32> %a0, %a1
447 %s = sext <4 x i1> %c to <4 x i32>
448 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
449 %2 = or <4 x i32> %s, %1
450 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
451 %4 = or <4 x i32> %2, %3
452 %5 = extractelement <4 x i32> %4, i32 0
456 define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
457 ; SSE-LABEL: test_v8i32_sext:
459 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
460 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
461 ; SSE-NEXT: por %xmm1, %xmm0
462 ; SSE-NEXT: movmskps %xmm0, %ecx
463 ; SSE-NEXT: xorl %eax, %eax
464 ; SSE-NEXT: negl %ecx
465 ; SSE-NEXT: sbbl %eax, %eax
468 ; AVX1-LABEL: test_v8i32_sext:
470 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
471 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
472 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
473 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
474 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
475 ; AVX1-NEXT: xorl %eax, %eax
476 ; AVX1-NEXT: vtestps %xmm0, %xmm0
477 ; AVX1-NEXT: setne %al
478 ; AVX1-NEXT: negl %eax
479 ; AVX1-NEXT: vzeroupper
482 ; AVX2-LABEL: test_v8i32_sext:
484 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
485 ; AVX2-NEXT: xorl %eax, %eax
486 ; AVX2-NEXT: vtestps %ymm0, %ymm0
487 ; AVX2-NEXT: setne %al
488 ; AVX2-NEXT: negl %eax
489 ; AVX2-NEXT: vzeroupper
492 ; AVX512-LABEL: test_v8i32_sext:
494 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
495 ; AVX512-NEXT: xorl %eax, %eax
496 ; AVX512-NEXT: vtestps %ymm0, %ymm0
497 ; AVX512-NEXT: setne %al
498 ; AVX512-NEXT: negl %eax
499 ; AVX512-NEXT: vzeroupper
501 %c = icmp sgt <8 x i32> %a0, %a1
502 %s = sext <8 x i1> %c to <8 x i32>
503 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
504 %2 = or <8 x i32> %s, %1
505 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
506 %4 = or <8 x i32> %2, %3
507 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
508 %6 = or <8 x i32> %4, %5
509 %7 = extractelement <8 x i32> %6, i32 0
513 define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
514 ; SSE-LABEL: test_v8i32_legal_sext:
516 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
517 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
518 ; SSE-NEXT: packssdw %xmm1, %xmm0
519 ; SSE-NEXT: pmovmskb %xmm0, %ecx
520 ; SSE-NEXT: xorl %eax, %eax
521 ; SSE-NEXT: negl %ecx
522 ; SSE-NEXT: sbbl %eax, %eax
525 ; AVX1-LABEL: test_v8i32_legal_sext:
527 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
528 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
529 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
530 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
531 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
532 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
533 ; AVX1-NEXT: xorl %eax, %eax
534 ; AVX1-NEXT: negl %ecx
535 ; AVX1-NEXT: sbbl %eax, %eax
536 ; AVX1-NEXT: vzeroupper
539 ; AVX2-LABEL: test_v8i32_legal_sext:
541 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
542 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
543 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
544 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
545 ; AVX2-NEXT: xorl %eax, %eax
546 ; AVX2-NEXT: negl %ecx
547 ; AVX2-NEXT: sbbl %eax, %eax
548 ; AVX2-NEXT: vzeroupper
551 ; AVX512-LABEL: test_v8i32_legal_sext:
553 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
554 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
555 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
556 ; AVX512-NEXT: xorl %eax, %eax
557 ; AVX512-NEXT: negl %ecx
558 ; AVX512-NEXT: sbbl %eax, %eax
559 ; AVX512-NEXT: vzeroupper
561 %c = icmp sgt <8 x i32> %a0, %a1
562 %s = sext <8 x i1> %c to <8 x i16>
563 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
564 %2 = or <8 x i16> %s, %1
565 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
566 %4 = or <8 x i16> %2, %3
567 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
568 %6 = or <8 x i16> %4, %5
569 %7 = extractelement <8 x i16> %6, i32 0
570 %8 = sext i16 %7 to i32
574 define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
575 ; SSE-LABEL: test_v8i16_sext:
577 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
578 ; SSE-NEXT: pmovmskb %xmm0, %ecx
579 ; SSE-NEXT: xorl %eax, %eax
580 ; SSE-NEXT: negl %ecx
581 ; SSE-NEXT: sbbl %eax, %eax
582 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
585 ; AVX-LABEL: test_v8i16_sext:
587 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
588 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
589 ; AVX-NEXT: xorl %eax, %eax
590 ; AVX-NEXT: negl %ecx
591 ; AVX-NEXT: sbbl %eax, %eax
592 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
594 %c = icmp sgt <8 x i16> %a0, %a1
595 %s = sext <8 x i1> %c to <8 x i16>
596 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
597 %2 = or <8 x i16> %s, %1
598 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
599 %4 = or <8 x i16> %2, %3
600 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
601 %6 = or <8 x i16> %4, %5
602 %7 = extractelement <8 x i16> %6, i32 0
606 define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
607 ; SSE-LABEL: test_v16i16_sext:
609 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
610 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
611 ; SSE-NEXT: por %xmm1, %xmm0
612 ; SSE-NEXT: pmovmskb %xmm0, %ecx
613 ; SSE-NEXT: xorl %eax, %eax
614 ; SSE-NEXT: negl %ecx
615 ; SSE-NEXT: sbbl %eax, %eax
616 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
619 ; AVX1-LABEL: test_v16i16_sext:
621 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
622 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
623 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
624 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
625 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
626 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
627 ; AVX1-NEXT: xorl %eax, %eax
628 ; AVX1-NEXT: negl %ecx
629 ; AVX1-NEXT: sbbl %eax, %eax
630 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
631 ; AVX1-NEXT: vzeroupper
634 ; AVX2-LABEL: test_v16i16_sext:
636 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
637 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
638 ; AVX2-NEXT: xorl %eax, %eax
639 ; AVX2-NEXT: negl %ecx
640 ; AVX2-NEXT: sbbl %eax, %eax
641 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
642 ; AVX2-NEXT: vzeroupper
645 ; AVX512-LABEL: test_v16i16_sext:
647 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
648 ; AVX512-NEXT: vpmovmskb %ymm0, %ecx
649 ; AVX512-NEXT: xorl %eax, %eax
650 ; AVX512-NEXT: negl %ecx
651 ; AVX512-NEXT: sbbl %eax, %eax
652 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
653 ; AVX512-NEXT: vzeroupper
655 %c = icmp sgt <16 x i16> %a0, %a1
656 %s = sext <16 x i1> %c to <16 x i16>
657 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
658 %2 = or <16 x i16> %s, %1
659 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
660 %4 = or <16 x i16> %2, %3
661 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
662 %6 = or <16 x i16> %4, %5
663 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
664 %8 = or <16 x i16> %6, %7
665 %9 = extractelement <16 x i16> %8, i32 0
669 define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
670 ; SSE-LABEL: test_v16i16_legal_sext:
672 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
673 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
674 ; SSE-NEXT: packsswb %xmm1, %xmm0
675 ; SSE-NEXT: pmovmskb %xmm0, %ecx
676 ; SSE-NEXT: xorl %eax, %eax
677 ; SSE-NEXT: negl %ecx
678 ; SSE-NEXT: sbbl %eax, %eax
679 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
682 ; AVX1-LABEL: test_v16i16_legal_sext:
684 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
685 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
686 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
687 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
688 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
689 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
690 ; AVX1-NEXT: xorl %eax, %eax
691 ; AVX1-NEXT: negl %ecx
692 ; AVX1-NEXT: sbbl %eax, %eax
693 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
694 ; AVX1-NEXT: vzeroupper
697 ; AVX2-LABEL: test_v16i16_legal_sext:
699 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
700 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
701 ; AVX2-NEXT: xorl %eax, %eax
702 ; AVX2-NEXT: negl %ecx
703 ; AVX2-NEXT: sbbl %eax, %eax
704 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
705 ; AVX2-NEXT: vzeroupper
708 ; AVX512-LABEL: test_v16i16_legal_sext:
710 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
711 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
712 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
713 ; AVX512-NEXT: xorl %eax, %eax
714 ; AVX512-NEXT: negl %ecx
715 ; AVX512-NEXT: sbbl %eax, %eax
716 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
717 ; AVX512-NEXT: vzeroupper
719 %c = icmp sgt <16 x i16> %a0, %a1
720 %s = sext <16 x i1> %c to <16 x i8>
721 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
722 %2 = or <16 x i8> %s, %1
723 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
724 %4 = or <16 x i8> %2, %3
725 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
726 %6 = or <16 x i8> %4, %5
727 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
728 %8 = or <16 x i8> %6, %7
729 %9 = extractelement <16 x i8> %8, i32 0
730 %10 = sext i8 %9 to i16
734 define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
735 ; SSE-LABEL: test_v16i8_sext:
737 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
738 ; SSE-NEXT: pmovmskb %xmm0, %ecx
739 ; SSE-NEXT: xorl %eax, %eax
740 ; SSE-NEXT: negl %ecx
741 ; SSE-NEXT: sbbl %eax, %eax
742 ; SSE-NEXT: # kill: def $al killed $al killed $eax
745 ; AVX-LABEL: test_v16i8_sext:
747 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
748 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
749 ; AVX-NEXT: xorl %eax, %eax
750 ; AVX-NEXT: negl %ecx
751 ; AVX-NEXT: sbbl %eax, %eax
752 ; AVX-NEXT: # kill: def $al killed $al killed $eax
754 %c = icmp sgt <16 x i8> %a0, %a1
755 %s = sext <16 x i1> %c to <16 x i8>
756 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
757 %2 = or <16 x i8> %s, %1
758 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
759 %4 = or <16 x i8> %2, %3
760 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
761 %6 = or <16 x i8> %4, %5
762 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
763 %8 = or <16 x i8> %6, %7
764 %9 = extractelement <16 x i8> %8, i32 0
768 define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
769 ; SSE-LABEL: test_v32i8_sext:
771 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
772 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
773 ; SSE-NEXT: por %xmm1, %xmm0
774 ; SSE-NEXT: pmovmskb %xmm0, %ecx
775 ; SSE-NEXT: xorl %eax, %eax
776 ; SSE-NEXT: negl %ecx
777 ; SSE-NEXT: sbbl %eax, %eax
778 ; SSE-NEXT: # kill: def $al killed $al killed $eax
781 ; AVX1-LABEL: test_v32i8_sext:
783 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
784 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
785 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
786 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
787 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
788 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
789 ; AVX1-NEXT: xorl %eax, %eax
790 ; AVX1-NEXT: negl %ecx
791 ; AVX1-NEXT: sbbl %eax, %eax
792 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
793 ; AVX1-NEXT: vzeroupper
796 ; AVX2-LABEL: test_v32i8_sext:
798 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
799 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
800 ; AVX2-NEXT: xorl %eax, %eax
801 ; AVX2-NEXT: negl %ecx
802 ; AVX2-NEXT: sbbl %eax, %eax
803 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
804 ; AVX2-NEXT: vzeroupper
807 ; AVX512-LABEL: test_v32i8_sext:
809 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
810 ; AVX512-NEXT: vpmovmskb %ymm0, %ecx
811 ; AVX512-NEXT: xorl %eax, %eax
812 ; AVX512-NEXT: negl %ecx
813 ; AVX512-NEXT: sbbl %eax, %eax
814 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
815 ; AVX512-NEXT: vzeroupper
817 %c = icmp sgt <32 x i8> %a0, %a1
818 %s = sext <32 x i1> %c to <32 x i8>
819 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
820 %2 = or <32 x i8> %s, %1
821 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
822 %4 = or <32 x i8> %2, %3
823 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
824 %6 = or <32 x i8> %4, %5
825 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
826 %8 = or <32 x i8> %6, %7
827 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
828 %10 = or <32 x i8> %8, %9
829 %11 = extractelement <32 x i8> %10, i32 0
833 define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
834 ; SSE-LABEL: bool_reduction_v2f64:
836 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
837 ; SSE-NEXT: movmskpd %xmm1, %eax
838 ; SSE-NEXT: testl %eax, %eax
839 ; SSE-NEXT: setne %al
842 ; AVX1OR2-LABEL: bool_reduction_v2f64:
844 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
845 ; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
846 ; AVX1OR2-NEXT: setne %al
849 ; AVX512-LABEL: bool_reduction_v2f64:
851 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
852 ; AVX512-NEXT: kmovd %k0, %eax
853 ; AVX512-NEXT: testb %al, %al
854 ; AVX512-NEXT: setne %al
856 %a = fcmp ogt <2 x double> %x, %y
857 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
858 %c = or <2 x i1> %a, %b
859 %d = extractelement <2 x i1> %c, i32 0
863 define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
864 ; SSE-LABEL: bool_reduction_v4f32:
866 ; SSE-NEXT: cmpeqps %xmm1, %xmm0
867 ; SSE-NEXT: movmskps %xmm0, %eax
868 ; SSE-NEXT: testl %eax, %eax
869 ; SSE-NEXT: setne %al
872 ; AVX1OR2-LABEL: bool_reduction_v4f32:
874 ; AVX1OR2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
875 ; AVX1OR2-NEXT: vtestps %xmm0, %xmm0
876 ; AVX1OR2-NEXT: setne %al
879 ; AVX512-LABEL: bool_reduction_v4f32:
881 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
882 ; AVX512-NEXT: kmovd %k0, %eax
883 ; AVX512-NEXT: testb %al, %al
884 ; AVX512-NEXT: setne %al
886 %a = fcmp oeq <4 x float> %x, %y
887 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
888 %b = or <4 x i1> %s1, %a
889 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
890 %c = or <4 x i1> %s2, %b
891 %d = extractelement <4 x i1> %c, i32 0
895 define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
896 ; SSE-LABEL: bool_reduction_v4f64:
898 ; SSE-NEXT: cmplepd %xmm1, %xmm3
899 ; SSE-NEXT: cmplepd %xmm0, %xmm2
900 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
901 ; SSE-NEXT: movmskps %xmm2, %eax
902 ; SSE-NEXT: testl %eax, %eax
903 ; SSE-NEXT: setne %al
906 ; AVX1OR2-LABEL: bool_reduction_v4f64:
908 ; AVX1OR2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
909 ; AVX1OR2-NEXT: vtestpd %ymm0, %ymm0
910 ; AVX1OR2-NEXT: setne %al
911 ; AVX1OR2-NEXT: vzeroupper
914 ; AVX512-LABEL: bool_reduction_v4f64:
916 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
917 ; AVX512-NEXT: kmovd %k0, %eax
918 ; AVX512-NEXT: testb %al, %al
919 ; AVX512-NEXT: setne %al
920 ; AVX512-NEXT: vzeroupper
922 %a = fcmp oge <4 x double> %x, %y
923 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
924 %b = or <4 x i1> %s1, %a
925 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
926 %c = or <4 x i1> %s2, %b
927 %d = extractelement <4 x i1> %c, i32 0
931 define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
932 ; SSE-LABEL: bool_reduction_v8f32:
934 ; SSE-NEXT: cmpneqps %xmm3, %xmm1
935 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
936 ; SSE-NEXT: packssdw %xmm1, %xmm0
937 ; SSE-NEXT: pmovmskb %xmm0, %eax
938 ; SSE-NEXT: testl %eax, %eax
939 ; SSE-NEXT: setne %al
942 ; AVX1OR2-LABEL: bool_reduction_v8f32:
944 ; AVX1OR2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
945 ; AVX1OR2-NEXT: vtestps %ymm0, %ymm0
946 ; AVX1OR2-NEXT: setne %al
947 ; AVX1OR2-NEXT: vzeroupper
950 ; AVX512-LABEL: bool_reduction_v8f32:
952 ; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0
953 ; AVX512-NEXT: kmovd %k0, %eax
954 ; AVX512-NEXT: testb %al, %al
955 ; AVX512-NEXT: setne %al
956 ; AVX512-NEXT: vzeroupper
958 %a = fcmp une <8 x float> %x, %y
959 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
960 %b = or <8 x i1> %s1, %a
961 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
962 %c = or <8 x i1> %s2, %b
963 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
964 %d = or <8 x i1> %s3, %c
965 %e = extractelement <8 x i1> %d, i32 0
969 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
970 ; SSE2-LABEL: bool_reduction_v2i64:
972 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
973 ; SSE2-NEXT: movmskps %xmm0, %eax
974 ; SSE2-NEXT: xorl $15, %eax
975 ; SSE2-NEXT: setne %al
978 ; SSE42-LABEL: bool_reduction_v2i64:
980 ; SSE42-NEXT: pxor %xmm1, %xmm0
981 ; SSE42-NEXT: ptest %xmm0, %xmm0
982 ; SSE42-NEXT: setne %al
985 ; AVX-LABEL: bool_reduction_v2i64:
987 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
988 ; AVX-NEXT: vptest %xmm0, %xmm0
989 ; AVX-NEXT: setne %al
991 %a = icmp ne <2 x i64> %x, %y
992 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
993 %c = or <2 x i1> %a, %b
994 %d = extractelement <2 x i1> %c, i32 0
998 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
999 ; SSE2-LABEL: bool_reduction_v4i32:
1001 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1002 ; SSE2-NEXT: pxor %xmm2, %xmm1
1003 ; SSE2-NEXT: pxor %xmm2, %xmm0
1004 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1005 ; SSE2-NEXT: movmskps %xmm0, %eax
1006 ; SSE2-NEXT: testl %eax, %eax
1007 ; SSE2-NEXT: setne %al
1010 ; SSE42-LABEL: bool_reduction_v4i32:
1012 ; SSE42-NEXT: pminud %xmm0, %xmm1
1013 ; SSE42-NEXT: pxor %xmm0, %xmm1
1014 ; SSE42-NEXT: ptest %xmm1, %xmm1
1015 ; SSE42-NEXT: setne %al
1018 ; AVX1OR2-LABEL: bool_reduction_v4i32:
1020 ; AVX1OR2-NEXT: vpminud %xmm1, %xmm0, %xmm1
1021 ; AVX1OR2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1022 ; AVX1OR2-NEXT: vptest %xmm0, %xmm0
1023 ; AVX1OR2-NEXT: setne %al
1024 ; AVX1OR2-NEXT: retq
1026 ; AVX512-LABEL: bool_reduction_v4i32:
1028 ; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
1029 ; AVX512-NEXT: kmovd %k0, %eax
1030 ; AVX512-NEXT: testb %al, %al
1031 ; AVX512-NEXT: setne %al
1033 %a = icmp ugt <4 x i32> %x, %y
1034 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1035 %b = or <4 x i1> %s1, %a
1036 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1037 %c = or <4 x i1> %s2, %b
1038 %d = extractelement <4 x i1> %c, i32 0
1042 define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
1043 ; SSE-LABEL: bool_reduction_v8i16:
1045 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1
1046 ; SSE-NEXT: pmovmskb %xmm1, %eax
1047 ; SSE-NEXT: testl %eax, %eax
1048 ; SSE-NEXT: setne %al
1051 ; AVX1OR2-LABEL: bool_reduction_v8i16:
1053 ; AVX1OR2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1054 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1055 ; AVX1OR2-NEXT: testl %eax, %eax
1056 ; AVX1OR2-NEXT: setne %al
1057 ; AVX1OR2-NEXT: retq
1059 ; AVX512-LABEL: bool_reduction_v8i16:
1061 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0
1062 ; AVX512-NEXT: kmovd %k0, %eax
1063 ; AVX512-NEXT: testb %al, %al
1064 ; AVX512-NEXT: setne %al
1066 %a = icmp slt <8 x i16> %x, %y
1067 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1068 %b = or <8 x i1> %s1, %a
1069 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1070 %c = or <8 x i1> %s2, %b
1071 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1072 %d = or <8 x i1> %s3, %c
1073 %e = extractelement <8 x i1> %d, i32 0
1077 define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1078 ; SSE-LABEL: bool_reduction_v16i8:
1080 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
1081 ; SSE-NEXT: pmovmskb %xmm0, %eax
1082 ; SSE-NEXT: testl %eax, %eax
1083 ; SSE-NEXT: setne %al
1086 ; AVX1OR2-LABEL: bool_reduction_v16i8:
1088 ; AVX1OR2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
1089 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1090 ; AVX1OR2-NEXT: testl %eax, %eax
1091 ; AVX1OR2-NEXT: setne %al
1092 ; AVX1OR2-NEXT: retq
1094 ; AVX512-LABEL: bool_reduction_v16i8:
1096 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
1097 ; AVX512-NEXT: kortestw %k0, %k0
1098 ; AVX512-NEXT: setne %al
1100 %a = icmp sgt <16 x i8> %x, %y
1101 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1102 %b = or <16 x i1> %s1, %a
1103 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1104 %c = or <16 x i1> %s2, %b
1105 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1106 %d = or <16 x i1> %s3, %c
1107 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1108 %e = or <16 x i1> %s4, %d
1109 %f = extractelement <16 x i1> %e, i32 0
1113 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1114 ; SSE2-LABEL: bool_reduction_v4i64:
1116 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1117 ; SSE2-NEXT: pxor %xmm4, %xmm1
1118 ; SSE2-NEXT: pxor %xmm4, %xmm3
1119 ; SSE2-NEXT: movdqa %xmm3, %xmm5
1120 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
1121 ; SSE2-NEXT: pxor %xmm4, %xmm0
1122 ; SSE2-NEXT: pxor %xmm4, %xmm2
1123 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1124 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1125 ; SSE2-NEXT: movdqa %xmm4, %xmm6
1126 ; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
1127 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1128 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
1129 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
1130 ; SSE2-NEXT: andps %xmm6, %xmm2
1131 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
1132 ; SSE2-NEXT: orps %xmm2, %xmm4
1133 ; SSE2-NEXT: movmskps %xmm4, %eax
1134 ; SSE2-NEXT: testl %eax, %eax
1135 ; SSE2-NEXT: setne %al
1138 ; SSE42-LABEL: bool_reduction_v4i64:
1140 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3
1141 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
1142 ; SSE42-NEXT: packssdw %xmm3, %xmm2
1143 ; SSE42-NEXT: movmskps %xmm2, %eax
1144 ; SSE42-NEXT: testl %eax, %eax
1145 ; SSE42-NEXT: setne %al
1148 ; AVX1-LABEL: bool_reduction_v4i64:
1150 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1151 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1152 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1153 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1154 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1155 ; AVX1-NEXT: vtestpd %xmm0, %xmm0
1156 ; AVX1-NEXT: setne %al
1157 ; AVX1-NEXT: vzeroupper
1160 ; AVX2-LABEL: bool_reduction_v4i64:
1162 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
1163 ; AVX2-NEXT: vtestpd %ymm0, %ymm0
1164 ; AVX2-NEXT: setne %al
1165 ; AVX2-NEXT: vzeroupper
1168 ; AVX512-LABEL: bool_reduction_v4i64:
1170 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
1171 ; AVX512-NEXT: kmovd %k0, %eax
1172 ; AVX512-NEXT: testb %al, %al
1173 ; AVX512-NEXT: setne %al
1174 ; AVX512-NEXT: vzeroupper
1176 %a = icmp slt <4 x i64> %x, %y
1177 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1178 %b = or <4 x i1> %s1, %a
1179 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1180 %c = or <4 x i1> %s2, %b
1181 %d = extractelement <4 x i1> %c, i32 0
1185 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1186 ; SSE2-LABEL: bool_reduction_v8i32:
1188 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1189 ; SSE2-NEXT: pxor %xmm4, %xmm3
1190 ; SSE2-NEXT: pxor %xmm4, %xmm1
1191 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
1192 ; SSE2-NEXT: pxor %xmm4, %xmm2
1193 ; SSE2-NEXT: pxor %xmm4, %xmm0
1194 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1195 ; SSE2-NEXT: packssdw %xmm1, %xmm0
1196 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1197 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1198 ; SSE2-NEXT: xorb $-1, %al
1199 ; SSE2-NEXT: setne %al
1202 ; SSE42-LABEL: bool_reduction_v8i32:
1204 ; SSE42-NEXT: pminud %xmm1, %xmm3
1205 ; SSE42-NEXT: pcmpeqd %xmm1, %xmm3
1206 ; SSE42-NEXT: pminud %xmm0, %xmm2
1207 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
1208 ; SSE42-NEXT: packssdw %xmm3, %xmm2
1209 ; SSE42-NEXT: pmovmskb %xmm2, %eax
1210 ; SSE42-NEXT: testl %eax, %eax
1211 ; SSE42-NEXT: setne %al
1214 ; AVX1-LABEL: bool_reduction_v8i32:
1216 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1217 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1218 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1219 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
1220 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
1221 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1222 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1223 ; AVX1-NEXT: vtestps %xmm0, %xmm0
1224 ; AVX1-NEXT: setne %al
1225 ; AVX1-NEXT: vzeroupper
1228 ; AVX2-LABEL: bool_reduction_v8i32:
1230 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
1231 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1232 ; AVX2-NEXT: vtestps %ymm0, %ymm0
1233 ; AVX2-NEXT: setne %al
1234 ; AVX2-NEXT: vzeroupper
1237 ; AVX512-LABEL: bool_reduction_v8i32:
1239 ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0
1240 ; AVX512-NEXT: kmovd %k0, %eax
1241 ; AVX512-NEXT: testb %al, %al
1242 ; AVX512-NEXT: setne %al
1243 ; AVX512-NEXT: vzeroupper
1245 %a = icmp ule <8 x i32> %x, %y
1246 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1247 %b = or <8 x i1> %s1, %a
1248 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1249 %c = or <8 x i1> %s2, %b
1250 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1251 %d = or <8 x i1> %s3, %c
1252 %e = extractelement <8 x i1> %d, i32 0
1256 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1257 ; SSE-LABEL: bool_reduction_v16i16:
1259 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
1260 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1261 ; SSE-NEXT: packsswb %xmm1, %xmm0
1262 ; SSE-NEXT: pmovmskb %xmm0, %eax
1263 ; SSE-NEXT: testl %eax, %eax
1264 ; SSE-NEXT: setne %al
1267 ; AVX1-LABEL: bool_reduction_v16i16:
1269 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1270 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1271 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
1272 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1273 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1274 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1275 ; AVX1-NEXT: testl %eax, %eax
1276 ; AVX1-NEXT: setne %al
1277 ; AVX1-NEXT: vzeroupper
1280 ; AVX2-LABEL: bool_reduction_v16i16:
1282 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1283 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1284 ; AVX2-NEXT: testl %eax, %eax
1285 ; AVX2-NEXT: setne %al
1286 ; AVX2-NEXT: vzeroupper
1289 ; AVX512-LABEL: bool_reduction_v16i16:
1291 ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
1292 ; AVX512-NEXT: kortestw %k0, %k0
1293 ; AVX512-NEXT: setne %al
1294 ; AVX512-NEXT: vzeroupper
1296 %a = icmp eq <16 x i16> %x, %y
1297 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1298 %b = or <16 x i1> %s1, %a
1299 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1300 %c = or <16 x i1> %s2, %b
1301 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1302 %d = or <16 x i1> %s3, %c
1303 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1304 %e = or <16 x i1> %s4, %d
1305 %f = extractelement <16 x i1> %e, i32 0
1309 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1310 ; SSE-LABEL: bool_reduction_v32i8:
1312 ; SSE-NEXT: pcmpeqb %xmm3, %xmm1
1313 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1314 ; SSE-NEXT: por %xmm1, %xmm0
1315 ; SSE-NEXT: pmovmskb %xmm0, %eax
1316 ; SSE-NEXT: testl %eax, %eax
1317 ; SSE-NEXT: setne %al
1320 ; AVX1-LABEL: bool_reduction_v32i8:
1322 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1323 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1324 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
1325 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1326 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1327 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1328 ; AVX1-NEXT: testl %eax, %eax
1329 ; AVX1-NEXT: setne %al
1330 ; AVX1-NEXT: vzeroupper
1333 ; AVX2-LABEL: bool_reduction_v32i8:
1335 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1336 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1337 ; AVX2-NEXT: testl %eax, %eax
1338 ; AVX2-NEXT: setne %al
1339 ; AVX2-NEXT: vzeroupper
1342 ; AVX512-LABEL: bool_reduction_v32i8:
1344 ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
1345 ; AVX512-NEXT: kortestd %k0, %k0
1346 ; AVX512-NEXT: setne %al
1347 ; AVX512-NEXT: vzeroupper
1349 %a = icmp eq <32 x i8> %x, %y
1350 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1351 %b = or <32 x i1> %s1, %a
1352 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1353 %c = or <32 x i1> %s2, %b
1354 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1355 %d = or <32 x i1> %s3, %c
1356 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1357 %e = or <32 x i1> %s4, %d
1358 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1359 %f = or <32 x i1> %s5, %e
1360 %g = extractelement <32 x i1> %f, i32 0
1364 define {i32, i1} @test_v16i8_muti_uses(<16 x i8> %x, <16 x i8>%y, <16 x i8> %z) {
1365 ; SSE-LABEL: test_v16i8_muti_uses:
1367 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
1368 ; SSE-NEXT: pcmpeqb %xmm1, %xmm2
1369 ; SSE-NEXT: pmovmskb %xmm0, %ecx
1370 ; SSE-NEXT: pmovmskb %xmm2, %eax
1371 ; SSE-NEXT: shll $16, %eax
1372 ; SSE-NEXT: orl %ecx, %eax
1373 ; SSE-NEXT: sete %dl
1376 ; AVX1-LABEL: test_v16i8_muti_uses:
1378 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1379 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
1380 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1381 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
1382 ; AVX1-NEXT: shll $16, %eax
1383 ; AVX1-NEXT: orl %ecx, %eax
1384 ; AVX1-NEXT: sete %dl
1387 ; AVX2-LABEL: test_v16i8_muti_uses:
1389 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1390 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
1391 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1392 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1393 ; AVX2-NEXT: testl %eax, %eax
1394 ; AVX2-NEXT: sete %dl
1395 ; AVX2-NEXT: vzeroupper
1398 ; AVX512-LABEL: test_v16i8_muti_uses:
1400 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
1401 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm2, %k1
1402 ; AVX512-NEXT: kunpckwd %k0, %k1, %k0
1403 ; AVX512-NEXT: kortestd %k0, %k0
1404 ; AVX512-NEXT: kmovd %k0, %eax
1405 ; AVX512-NEXT: sete %dl
1407 %t1 = icmp eq <16 x i8> %x, %y
1408 %t2 = icmp eq <16 x i8> %z, %y
1409 %a = shufflevector <16 x i1> %t1, <16 x i1> %t2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1410 %b = bitcast <32 x i1> %a to i32
1411 %c = icmp eq i32 %b, 0
1412 %r1 = insertvalue {i32, i1} poison, i32 %b, 0
1413 %r2 = insertvalue {i32, i1} %r1, i1 %c, 1
1418 define i1 @select_v2i8(ptr %s0, ptr %s1) {
1419 ; SSE2-LABEL: select_v2i8:
1421 ; SSE2-NEXT: movzwl (%rdi), %eax
1422 ; SSE2-NEXT: movd %eax, %xmm0
1423 ; SSE2-NEXT: movzwl (%rsi), %eax
1424 ; SSE2-NEXT: movd %eax, %xmm1
1425 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
1426 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1427 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
1428 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1429 ; SSE2-NEXT: movmskpd %xmm0, %eax
1430 ; SSE2-NEXT: testl %eax, %eax
1431 ; SSE2-NEXT: setne %al
1434 ; SSE42-LABEL: select_v2i8:
1436 ; SSE42-NEXT: movzwl (%rdi), %eax
1437 ; SSE42-NEXT: movd %eax, %xmm0
1438 ; SSE42-NEXT: movzwl (%rsi), %eax
1439 ; SSE42-NEXT: movd %eax, %xmm1
1440 ; SSE42-NEXT: pcmpeqb %xmm0, %xmm1
1441 ; SSE42-NEXT: pmovsxbq %xmm1, %xmm0
1442 ; SSE42-NEXT: movmskpd %xmm0, %eax
1443 ; SSE42-NEXT: testl %eax, %eax
1444 ; SSE42-NEXT: setne %al
1447 ; AVX1OR2-LABEL: select_v2i8:
1449 ; AVX1OR2-NEXT: movzwl (%rdi), %eax
1450 ; AVX1OR2-NEXT: vmovd %eax, %xmm0
1451 ; AVX1OR2-NEXT: movzwl (%rsi), %eax
1452 ; AVX1OR2-NEXT: vmovd %eax, %xmm1
1453 ; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1454 ; AVX1OR2-NEXT: vpmovsxbq %xmm0, %xmm0
1455 ; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
1456 ; AVX1OR2-NEXT: setne %al
1457 ; AVX1OR2-NEXT: retq
1459 ; AVX512-LABEL: select_v2i8:
1461 ; AVX512-NEXT: movzwl (%rdi), %eax
1462 ; AVX512-NEXT: vmovd %eax, %xmm0
1463 ; AVX512-NEXT: movzwl (%rsi), %eax
1464 ; AVX512-NEXT: vmovd %eax, %xmm1
1465 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
1466 ; AVX512-NEXT: kmovd %k0, %eax
1467 ; AVX512-NEXT: testb $3, %al
1468 ; AVX512-NEXT: setne %al
1470 %v0 = load <2 x i8>, ptr %s0, align 1
1471 %v1 = load <2 x i8>, ptr %s1, align 1
1472 %cmp = icmp eq <2 x i8> %v0, %v1
1473 %cmp0 = extractelement <2 x i1> %cmp, i32 0
1474 %cmp1 = extractelement <2 x i1> %cmp, i32 1
1475 %res = select i1 %cmp0, i1 true, i1 %cmp1