1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
7 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8 ; SSE-LABEL: test_v2f64_sext:
10 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
11 ; SSE-NEXT: movmskpd %xmm1, %eax
13 ; SSE-NEXT: sbbq %rax, %rax
16 ; AVX-LABEL: test_v2f64_sext:
18 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
19 ; AVX-NEXT: vmovmskpd %xmm0, %eax
21 ; AVX-NEXT: sbbq %rax, %rax
24 ; AVX512-LABEL: test_v2f64_sext:
26 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
27 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
28 ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
29 ; AVX512-NEXT: vmovq %xmm0, %rax
31 %c = fcmp ogt <2 x double> %a0, %a1
32 %s = sext <2 x i1> %c to <2 x i64>
33 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
34 %2 = or <2 x i64> %s, %1
35 %3 = extractelement <2 x i64> %2, i32 0
39 define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
40 ; SSE-LABEL: test_v4f64_sext:
42 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
43 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
44 ; SSE-NEXT: orpd %xmm3, %xmm2
45 ; SSE-NEXT: movmskpd %xmm2, %eax
47 ; SSE-NEXT: sbbq %rax, %rax
50 ; AVX-LABEL: test_v4f64_sext:
52 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
53 ; AVX-NEXT: vmovmskpd %ymm0, %eax
55 ; AVX-NEXT: sbbq %rax, %rax
56 ; AVX-NEXT: vzeroupper
59 ; AVX512-LABEL: test_v4f64_sext:
61 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
62 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
63 ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
64 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
65 ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
66 ; AVX512-NEXT: vmovq %xmm0, %rax
67 ; AVX512-NEXT: vzeroupper
69 %c = fcmp ogt <4 x double> %a0, %a1
70 %s = sext <4 x i1> %c to <4 x i64>
71 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
72 %2 = or <4 x i64> %s, %1
73 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
74 %4 = or <4 x i64> %2, %3
75 %5 = extractelement <4 x i64> %4, i64 0
79 define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
80 ; SSE-LABEL: test_v4f64_legal_sext:
82 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
83 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
84 ; SSE-NEXT: packssdw %xmm3, %xmm2
85 ; SSE-NEXT: movmskps %xmm2, %eax
87 ; SSE-NEXT: sbbq %rax, %rax
90 ; AVX-LABEL: test_v4f64_legal_sext:
92 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
93 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
94 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
95 ; AVX-NEXT: vmovmskps %xmm0, %eax
97 ; AVX-NEXT: sbbq %rax, %rax
98 ; AVX-NEXT: vzeroupper
101 ; AVX512-LABEL: test_v4f64_legal_sext:
103 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
104 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
105 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
106 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
107 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
108 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
109 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
110 ; AVX512-NEXT: vmovd %xmm0, %eax
112 ; AVX512-NEXT: vzeroupper
114 %c = fcmp ogt <4 x double> %a0, %a1
115 %s = sext <4 x i1> %c to <4 x i32>
116 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
117 %2 = or <4 x i32> %s, %1
118 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
119 %4 = or <4 x i32> %2, %3
120 %5 = extractelement <4 x i32> %4, i64 0
121 %6 = sext i32 %5 to i64
125 define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
126 ; SSE-LABEL: test_v4f32_sext:
128 ; SSE-NEXT: cmpltps %xmm0, %xmm1
129 ; SSE-NEXT: movmskps %xmm1, %eax
130 ; SSE-NEXT: negl %eax
131 ; SSE-NEXT: sbbl %eax, %eax
134 ; AVX-LABEL: test_v4f32_sext:
136 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
137 ; AVX-NEXT: vmovmskps %xmm0, %eax
138 ; AVX-NEXT: negl %eax
139 ; AVX-NEXT: sbbl %eax, %eax
142 ; AVX512-LABEL: test_v4f32_sext:
144 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
145 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
146 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
147 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
148 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
149 ; AVX512-NEXT: vmovd %xmm0, %eax
151 %c = fcmp ogt <4 x float> %a0, %a1
152 %s = sext <4 x i1> %c to <4 x i32>
153 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
154 %2 = or <4 x i32> %s, %1
155 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
156 %4 = or <4 x i32> %2, %3
157 %5 = extractelement <4 x i32> %4, i32 0
161 define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
162 ; SSE-LABEL: test_v8f32_sext:
164 ; SSE-NEXT: cmpltps %xmm1, %xmm3
165 ; SSE-NEXT: cmpltps %xmm0, %xmm2
166 ; SSE-NEXT: orps %xmm3, %xmm2
167 ; SSE-NEXT: movmskps %xmm2, %eax
168 ; SSE-NEXT: negl %eax
169 ; SSE-NEXT: sbbl %eax, %eax
172 ; AVX-LABEL: test_v8f32_sext:
174 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
175 ; AVX-NEXT: vmovmskps %ymm0, %eax
176 ; AVX-NEXT: negl %eax
177 ; AVX-NEXT: sbbl %eax, %eax
178 ; AVX-NEXT: vzeroupper
181 ; AVX512-LABEL: test_v8f32_sext:
183 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
184 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
185 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
186 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
187 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
188 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
189 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
190 ; AVX512-NEXT: vmovd %xmm0, %eax
191 ; AVX512-NEXT: vzeroupper
193 %c = fcmp ogt <8 x float> %a0, %a1
194 %s = sext <8 x i1> %c to <8 x i32>
195 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
196 %2 = or <8 x i32> %s, %1
197 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
198 %4 = or <8 x i32> %2, %3
199 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
200 %6 = or <8 x i32> %4, %5
201 %7 = extractelement <8 x i32> %6, i32 0
205 define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
206 ; SSE-LABEL: test_v8f32_legal_sext:
208 ; SSE-NEXT: cmpltps %xmm1, %xmm3
209 ; SSE-NEXT: cmpltps %xmm0, %xmm2
210 ; SSE-NEXT: packssdw %xmm3, %xmm2
211 ; SSE-NEXT: pmovmskb %xmm2, %eax
212 ; SSE-NEXT: negl %eax
213 ; SSE-NEXT: sbbl %eax, %eax
216 ; AVX-LABEL: test_v8f32_legal_sext:
218 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
219 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
220 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
221 ; AVX-NEXT: vpmovmskb %xmm0, %eax
222 ; AVX-NEXT: negl %eax
223 ; AVX-NEXT: sbbl %eax, %eax
224 ; AVX-NEXT: vzeroupper
227 ; AVX512-LABEL: test_v8f32_legal_sext:
229 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
230 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
231 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
232 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
233 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
234 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
235 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
236 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
237 ; AVX512-NEXT: vmovd %xmm0, %eax
239 ; AVX512-NEXT: vzeroupper
241 %c = fcmp ogt <8 x float> %a0, %a1
242 %s = sext <8 x i1> %c to <8 x i16>
243 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
244 %2 = or <8 x i16> %s, %1
245 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
246 %4 = or <8 x i16> %2, %3
247 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
248 %6 = or <8 x i16> %4, %5
249 %7 = extractelement <8 x i16> %6, i32 0
250 %8 = sext i16 %7 to i32
254 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
255 ; SSE-LABEL: test_v2i64_sext:
257 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
258 ; SSE-NEXT: movmskpd %xmm0, %eax
259 ; SSE-NEXT: negl %eax
260 ; SSE-NEXT: sbbq %rax, %rax
263 ; AVX-LABEL: test_v2i64_sext:
265 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
266 ; AVX-NEXT: vmovmskpd %xmm0, %eax
267 ; AVX-NEXT: negl %eax
268 ; AVX-NEXT: sbbq %rax, %rax
271 ; AVX512-LABEL: test_v2i64_sext:
273 ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
274 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
275 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
276 ; AVX512-NEXT: vmovq %xmm0, %rax
278 %c = icmp sgt <2 x i64> %a0, %a1
279 %s = sext <2 x i1> %c to <2 x i64>
280 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
281 %2 = or <2 x i64> %s, %1
282 %3 = extractelement <2 x i64> %2, i32 0
286 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
287 ; SSE-LABEL: test_v4i64_sext:
289 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
290 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
291 ; SSE-NEXT: por %xmm1, %xmm0
292 ; SSE-NEXT: movmskpd %xmm0, %eax
293 ; SSE-NEXT: negl %eax
294 ; SSE-NEXT: sbbq %rax, %rax
297 ; AVX1-LABEL: test_v4i64_sext:
299 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
300 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
301 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
302 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
303 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
304 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
305 ; AVX1-NEXT: negl %eax
306 ; AVX1-NEXT: sbbq %rax, %rax
307 ; AVX1-NEXT: vzeroupper
310 ; AVX2-LABEL: test_v4i64_sext:
312 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
313 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
314 ; AVX2-NEXT: negl %eax
315 ; AVX2-NEXT: sbbq %rax, %rax
316 ; AVX2-NEXT: vzeroupper
319 ; AVX512-LABEL: test_v4i64_sext:
321 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
322 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
323 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
324 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
325 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
326 ; AVX512-NEXT: vmovq %xmm0, %rax
327 ; AVX512-NEXT: vzeroupper
329 %c = icmp sgt <4 x i64> %a0, %a1
330 %s = sext <4 x i1> %c to <4 x i64>
331 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
332 %2 = or <4 x i64> %s, %1
333 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
334 %4 = or <4 x i64> %2, %3
335 %5 = extractelement <4 x i64> %4, i64 0
339 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
340 ; SSE-LABEL: test_v4i64_legal_sext:
342 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
343 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
344 ; SSE-NEXT: packssdw %xmm1, %xmm0
345 ; SSE-NEXT: movmskps %xmm0, %eax
346 ; SSE-NEXT: negl %eax
347 ; SSE-NEXT: sbbq %rax, %rax
350 ; AVX1-LABEL: test_v4i64_legal_sext:
352 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
354 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
355 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
356 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
357 ; AVX1-NEXT: vmovmskps %xmm0, %eax
358 ; AVX1-NEXT: negl %eax
359 ; AVX1-NEXT: sbbq %rax, %rax
360 ; AVX1-NEXT: vzeroupper
363 ; AVX2-LABEL: test_v4i64_legal_sext:
365 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
366 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
367 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
368 ; AVX2-NEXT: vmovmskps %xmm0, %eax
369 ; AVX2-NEXT: negl %eax
370 ; AVX2-NEXT: sbbq %rax, %rax
371 ; AVX2-NEXT: vzeroupper
374 ; AVX512-LABEL: test_v4i64_legal_sext:
376 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
377 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
378 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
379 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
380 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
381 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
382 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
383 ; AVX512-NEXT: vmovd %xmm0, %eax
385 ; AVX512-NEXT: vzeroupper
387 %c = icmp sgt <4 x i64> %a0, %a1
388 %s = sext <4 x i1> %c to <4 x i32>
389 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
390 %2 = or <4 x i32> %s, %1
391 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
392 %4 = or <4 x i32> %2, %3
393 %5 = extractelement <4 x i32> %4, i64 0
394 %6 = sext i32 %5 to i64
398 define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
399 ; SSE-LABEL: test_v4i32_sext:
401 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
402 ; SSE-NEXT: movmskps %xmm0, %eax
403 ; SSE-NEXT: negl %eax
404 ; SSE-NEXT: sbbl %eax, %eax
407 ; AVX-LABEL: test_v4i32_sext:
409 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
410 ; AVX-NEXT: vmovmskps %xmm0, %eax
411 ; AVX-NEXT: negl %eax
412 ; AVX-NEXT: sbbl %eax, %eax
415 ; AVX512-LABEL: test_v4i32_sext:
417 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
418 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
419 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
420 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
421 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
422 ; AVX512-NEXT: vmovd %xmm0, %eax
424 %c = icmp sgt <4 x i32> %a0, %a1
425 %s = sext <4 x i1> %c to <4 x i32>
426 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
427 %2 = or <4 x i32> %s, %1
428 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
429 %4 = or <4 x i32> %2, %3
430 %5 = extractelement <4 x i32> %4, i32 0
434 define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
435 ; SSE-LABEL: test_v8i32_sext:
437 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
438 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
439 ; SSE-NEXT: por %xmm1, %xmm0
440 ; SSE-NEXT: movmskps %xmm0, %eax
441 ; SSE-NEXT: negl %eax
442 ; SSE-NEXT: sbbl %eax, %eax
445 ; AVX1-LABEL: test_v8i32_sext:
447 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
448 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
449 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
450 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
451 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
452 ; AVX1-NEXT: vmovmskps %ymm0, %eax
453 ; AVX1-NEXT: negl %eax
454 ; AVX1-NEXT: sbbl %eax, %eax
455 ; AVX1-NEXT: vzeroupper
458 ; AVX2-LABEL: test_v8i32_sext:
460 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
461 ; AVX2-NEXT: vmovmskps %ymm0, %eax
462 ; AVX2-NEXT: negl %eax
463 ; AVX2-NEXT: sbbl %eax, %eax
464 ; AVX2-NEXT: vzeroupper
467 ; AVX512-LABEL: test_v8i32_sext:
469 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
470 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
471 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
472 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
473 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
474 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
475 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
476 ; AVX512-NEXT: vmovd %xmm0, %eax
477 ; AVX512-NEXT: vzeroupper
479 %c = icmp sgt <8 x i32> %a0, %a1
480 %s = sext <8 x i1> %c to <8 x i32>
481 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
482 %2 = or <8 x i32> %s, %1
483 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
484 %4 = or <8 x i32> %2, %3
485 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
486 %6 = or <8 x i32> %4, %5
487 %7 = extractelement <8 x i32> %6, i32 0
491 define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
492 ; SSE-LABEL: test_v8i32_legal_sext:
494 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
495 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
496 ; SSE-NEXT: packssdw %xmm1, %xmm0
497 ; SSE-NEXT: pmovmskb %xmm0, %eax
498 ; SSE-NEXT: negl %eax
499 ; SSE-NEXT: sbbl %eax, %eax
502 ; AVX1-LABEL: test_v8i32_legal_sext:
504 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
505 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
506 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
507 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
508 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
509 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
510 ; AVX1-NEXT: negl %eax
511 ; AVX1-NEXT: sbbl %eax, %eax
512 ; AVX1-NEXT: vzeroupper
515 ; AVX2-LABEL: test_v8i32_legal_sext:
517 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
518 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
519 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
520 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
521 ; AVX2-NEXT: negl %eax
522 ; AVX2-NEXT: sbbl %eax, %eax
523 ; AVX2-NEXT: vzeroupper
526 ; AVX512-LABEL: test_v8i32_legal_sext:
528 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
529 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
530 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
531 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
532 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
533 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
534 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
535 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
536 ; AVX512-NEXT: vmovd %xmm0, %eax
538 ; AVX512-NEXT: vzeroupper
540 %c = icmp sgt <8 x i32> %a0, %a1
541 %s = sext <8 x i1> %c to <8 x i16>
542 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
543 %2 = or <8 x i16> %s, %1
544 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
545 %4 = or <8 x i16> %2, %3
546 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
547 %6 = or <8 x i16> %4, %5
548 %7 = extractelement <8 x i16> %6, i32 0
549 %8 = sext i16 %7 to i32
553 define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
554 ; SSE-LABEL: test_v8i16_sext:
556 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
557 ; SSE-NEXT: pmovmskb %xmm0, %eax
558 ; SSE-NEXT: negl %eax
559 ; SSE-NEXT: sbbl %eax, %eax
560 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
563 ; AVX-LABEL: test_v8i16_sext:
565 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
566 ; AVX-NEXT: vpmovmskb %xmm0, %eax
567 ; AVX-NEXT: negl %eax
568 ; AVX-NEXT: sbbl %eax, %eax
569 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
572 ; AVX512-LABEL: test_v8i16_sext:
574 ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
575 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
576 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
577 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
578 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
579 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
580 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
581 ; AVX512-NEXT: vmovd %xmm0, %eax
582 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
584 %c = icmp sgt <8 x i16> %a0, %a1
585 %s = sext <8 x i1> %c to <8 x i16>
586 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
587 %2 = or <8 x i16> %s, %1
588 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
589 %4 = or <8 x i16> %2, %3
590 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
591 %6 = or <8 x i16> %4, %5
592 %7 = extractelement <8 x i16> %6, i32 0
596 define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
597 ; SSE-LABEL: test_v16i16_sext:
599 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
600 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
601 ; SSE-NEXT: por %xmm1, %xmm0
602 ; SSE-NEXT: pmovmskb %xmm0, %eax
603 ; SSE-NEXT: negl %eax
604 ; SSE-NEXT: sbbl %eax, %eax
605 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
608 ; AVX1-LABEL: test_v16i16_sext:
610 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
611 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
612 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
613 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
614 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
615 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
616 ; AVX1-NEXT: negl %eax
617 ; AVX1-NEXT: sbbl %eax, %eax
618 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
619 ; AVX1-NEXT: vzeroupper
622 ; AVX2-LABEL: test_v16i16_sext:
624 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
625 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
626 ; AVX2-NEXT: negl %eax
627 ; AVX2-NEXT: sbbl %eax, %eax
628 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
629 ; AVX2-NEXT: vzeroupper
632 ; AVX512-LABEL: test_v16i16_sext:
634 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
635 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
636 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
637 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
638 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
639 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
640 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
641 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
642 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
643 ; AVX512-NEXT: vmovd %xmm0, %eax
644 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
645 ; AVX512-NEXT: vzeroupper
647 %c = icmp sgt <16 x i16> %a0, %a1
648 %s = sext <16 x i1> %c to <16 x i16>
649 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
650 %2 = or <16 x i16> %s, %1
651 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
652 %4 = or <16 x i16> %2, %3
653 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
654 %6 = or <16 x i16> %4, %5
655 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
656 %8 = or <16 x i16> %6, %7
657 %9 = extractelement <16 x i16> %8, i32 0
661 define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
662 ; SSE-LABEL: test_v16i16_legal_sext:
664 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
665 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
666 ; SSE-NEXT: packsswb %xmm1, %xmm0
667 ; SSE-NEXT: pmovmskb %xmm0, %eax
668 ; SSE-NEXT: negl %eax
669 ; SSE-NEXT: sbbw %ax, %ax
672 ; AVX1-LABEL: test_v16i16_legal_sext:
674 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
675 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
676 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
677 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
678 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
679 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
680 ; AVX1-NEXT: negl %eax
681 ; AVX1-NEXT: sbbw %ax, %ax
682 ; AVX1-NEXT: vzeroupper
685 ; AVX2-LABEL: test_v16i16_legal_sext:
687 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
688 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
689 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
690 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
691 ; AVX2-NEXT: negl %eax
692 ; AVX2-NEXT: sbbw %ax, %ax
693 ; AVX2-NEXT: vzeroupper
696 ; AVX512-LABEL: test_v16i16_legal_sext:
698 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
699 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
700 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
701 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
702 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
703 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
704 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
705 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
706 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
707 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
708 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
709 ; AVX512-NEXT: movsbl %al, %eax
710 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
711 ; AVX512-NEXT: vzeroupper
713 %c = icmp sgt <16 x i16> %a0, %a1
714 %s = sext <16 x i1> %c to <16 x i8>
715 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
716 %2 = or <16 x i8> %s, %1
717 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
718 %4 = or <16 x i8> %2, %3
719 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
720 %6 = or <16 x i8> %4, %5
721 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
722 %8 = or <16 x i8> %6, %7
723 %9 = extractelement <16 x i8> %8, i32 0
724 %10 = sext i8 %9 to i16
728 define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
729 ; SSE-LABEL: test_v16i8_sext:
731 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
732 ; SSE-NEXT: pmovmskb %xmm0, %eax
733 ; SSE-NEXT: negl %eax
734 ; SSE-NEXT: sbbb %al, %al
737 ; AVX-LABEL: test_v16i8_sext:
739 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
740 ; AVX-NEXT: vpmovmskb %xmm0, %eax
741 ; AVX-NEXT: negl %eax
742 ; AVX-NEXT: sbbb %al, %al
745 ; AVX512-LABEL: test_v16i8_sext:
747 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
748 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
749 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
750 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
751 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
752 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
753 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
754 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
755 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
756 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
757 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
759 %c = icmp sgt <16 x i8> %a0, %a1
760 %s = sext <16 x i1> %c to <16 x i8>
761 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
762 %2 = or <16 x i8> %s, %1
763 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
764 %4 = or <16 x i8> %2, %3
765 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
766 %6 = or <16 x i8> %4, %5
767 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
768 %8 = or <16 x i8> %6, %7
769 %9 = extractelement <16 x i8> %8, i32 0
773 define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
774 ; SSE-LABEL: test_v32i8_sext:
776 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
777 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
778 ; SSE-NEXT: por %xmm1, %xmm0
779 ; SSE-NEXT: pmovmskb %xmm0, %eax
780 ; SSE-NEXT: negl %eax
781 ; SSE-NEXT: sbbb %al, %al
784 ; AVX1-LABEL: test_v32i8_sext:
786 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
787 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
788 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
789 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
790 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
791 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
792 ; AVX1-NEXT: negl %eax
793 ; AVX1-NEXT: sbbb %al, %al
794 ; AVX1-NEXT: vzeroupper
797 ; AVX2-LABEL: test_v32i8_sext:
799 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
800 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
801 ; AVX2-NEXT: negl %eax
802 ; AVX2-NEXT: sbbb %al, %al
803 ; AVX2-NEXT: vzeroupper
806 ; AVX512-LABEL: test_v32i8_sext:
808 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
809 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
810 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
811 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
812 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
813 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
814 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
815 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
816 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
817 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
818 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
819 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
820 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
821 ; AVX512-NEXT: vzeroupper
823 %c = icmp sgt <32 x i8> %a0, %a1
824 %s = sext <32 x i1> %c to <32 x i8>
825 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
826 %2 = or <32 x i8> %s, %1
827 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
828 %4 = or <32 x i8> %2, %3
829 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
830 %6 = or <32 x i8> %4, %5
831 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
832 %8 = or <32 x i8> %6, %7
833 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
834 %10 = or <32 x i8> %8, %9
835 %11 = extractelement <32 x i8> %10, i32 0
839 define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
840 ; SSE-LABEL: bool_reduction_v2f64:
842 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
843 ; SSE-NEXT: movmskpd %xmm1, %eax
844 ; SSE-NEXT: testb %al, %al
845 ; SSE-NEXT: setne %al
848 ; AVX-LABEL: bool_reduction_v2f64:
850 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
851 ; AVX-NEXT: vmovmskpd %xmm0, %eax
852 ; AVX-NEXT: testb %al, %al
853 ; AVX-NEXT: setne %al
856 ; AVX512-LABEL: bool_reduction_v2f64:
858 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
859 ; AVX512-NEXT: kmovd %k0, %eax
860 ; AVX512-NEXT: testb $3, %al
861 ; AVX512-NEXT: setne %al
863 %a = fcmp ogt <2 x double> %x, %y
864 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
865 %c = or <2 x i1> %a, %b
866 %d = extractelement <2 x i1> %c, i32 0
870 define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
871 ; SSE-LABEL: bool_reduction_v4f32:
873 ; SSE-NEXT: cmpeqps %xmm1, %xmm0
874 ; SSE-NEXT: movmskps %xmm0, %eax
875 ; SSE-NEXT: testb %al, %al
876 ; SSE-NEXT: setne %al
879 ; AVX-LABEL: bool_reduction_v4f32:
881 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
882 ; AVX-NEXT: vmovmskps %xmm0, %eax
883 ; AVX-NEXT: testb %al, %al
884 ; AVX-NEXT: setne %al
887 ; AVX512-LABEL: bool_reduction_v4f32:
889 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
890 ; AVX512-NEXT: kmovd %k0, %eax
891 ; AVX512-NEXT: testb $15, %al
892 ; AVX512-NEXT: setne %al
894 %a = fcmp oeq <4 x float> %x, %y
895 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
896 %b = or <4 x i1> %s1, %a
897 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
898 %c = or <4 x i1> %s2, %b
899 %d = extractelement <4 x i1> %c, i32 0
903 define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
904 ; SSE-LABEL: bool_reduction_v4f64:
906 ; SSE-NEXT: cmplepd %xmm1, %xmm3
907 ; SSE-NEXT: cmplepd %xmm0, %xmm2
908 ; SSE-NEXT: packssdw %xmm3, %xmm2
909 ; SSE-NEXT: movmskps %xmm2, %eax
910 ; SSE-NEXT: testb %al, %al
911 ; SSE-NEXT: setne %al
914 ; AVX-LABEL: bool_reduction_v4f64:
916 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
917 ; AVX-NEXT: vmovmskpd %ymm0, %eax
918 ; AVX-NEXT: testb %al, %al
919 ; AVX-NEXT: setne %al
920 ; AVX-NEXT: vzeroupper
923 ; AVX512-LABEL: bool_reduction_v4f64:
925 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
926 ; AVX512-NEXT: kmovd %k0, %eax
927 ; AVX512-NEXT: testb $15, %al
928 ; AVX512-NEXT: setne %al
929 ; AVX512-NEXT: vzeroupper
931 %a = fcmp oge <4 x double> %x, %y
932 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
933 %b = or <4 x i1> %s1, %a
934 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
935 %c = or <4 x i1> %s2, %b
936 %d = extractelement <4 x i1> %c, i32 0
940 define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
941 ; SSE-LABEL: bool_reduction_v8f32:
943 ; SSE-NEXT: cmpneqps %xmm3, %xmm1
944 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
945 ; SSE-NEXT: packssdw %xmm1, %xmm0
946 ; SSE-NEXT: packsswb %xmm0, %xmm0
947 ; SSE-NEXT: pmovmskb %xmm0, %eax
948 ; SSE-NEXT: testb %al, %al
949 ; SSE-NEXT: setne %al
952 ; AVX-LABEL: bool_reduction_v8f32:
954 ; AVX-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
955 ; AVX-NEXT: vmovmskps %ymm0, %eax
956 ; AVX-NEXT: testb %al, %al
957 ; AVX-NEXT: setne %al
958 ; AVX-NEXT: vzeroupper
961 ; AVX512-LABEL: bool_reduction_v8f32:
963 ; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0
964 ; AVX512-NEXT: kmovd %k0, %eax
965 ; AVX512-NEXT: testb %al, %al
966 ; AVX512-NEXT: setne %al
967 ; AVX512-NEXT: vzeroupper
969 %a = fcmp une <8 x float> %x, %y
970 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
971 %b = or <8 x i1> %s1, %a
972 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
973 %c = or <8 x i1> %s2, %b
974 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
975 %d = or <8 x i1> %s3, %c
976 %e = extractelement <8 x i1> %d, i32 0
980 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
981 ; SSE-LABEL: bool_reduction_v2i64:
983 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
984 ; SSE-NEXT: pxor %xmm2, %xmm1
985 ; SSE-NEXT: pxor %xmm2, %xmm0
986 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
987 ; SSE-NEXT: movmskpd %xmm0, %eax
988 ; SSE-NEXT: testb %al, %al
989 ; SSE-NEXT: setne %al
992 ; AVX-LABEL: bool_reduction_v2i64:
994 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
995 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
996 ; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
997 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
998 ; AVX-NEXT: vmovmskpd %xmm0, %eax
999 ; AVX-NEXT: testb %al, %al
1000 ; AVX-NEXT: setne %al
1003 ; AVX512-LABEL: bool_reduction_v2i64:
1005 ; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
1006 ; AVX512-NEXT: kmovd %k0, %eax
1007 ; AVX512-NEXT: testb $3, %al
1008 ; AVX512-NEXT: setne %al
1010 %a = icmp ugt <2 x i64> %x, %y
1011 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
1012 %c = or <2 x i1> %a, %b
1013 %d = extractelement <2 x i1> %c, i32 0
1017 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
1018 ; SSE-LABEL: bool_reduction_v4i32:
1020 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
1021 ; SSE-NEXT: movmskps %xmm0, %eax
1022 ; SSE-NEXT: xorb $15, %al
1023 ; SSE-NEXT: setne %al
1026 ; AVX-LABEL: bool_reduction_v4i32:
1028 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1029 ; AVX-NEXT: vmovmskps %xmm0, %eax
1030 ; AVX-NEXT: xorb $15, %al
1031 ; AVX-NEXT: setne %al
1034 ; AVX512-LABEL: bool_reduction_v4i32:
1036 ; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
1037 ; AVX512-NEXT: kmovd %k0, %eax
1038 ; AVX512-NEXT: testb $15, %al
1039 ; AVX512-NEXT: setne %al
1041 %a = icmp ne <4 x i32> %x, %y
1042 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1043 %b = or <4 x i1> %s1, %a
1044 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1045 %c = or <4 x i1> %s2, %b
1046 %d = extractelement <4 x i1> %c, i32 0
1050 define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
1051 ; SSE-LABEL: bool_reduction_v8i16:
1053 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1
1054 ; SSE-NEXT: packsswb %xmm0, %xmm1
1055 ; SSE-NEXT: pmovmskb %xmm1, %eax
1056 ; SSE-NEXT: testb %al, %al
1057 ; SSE-NEXT: setne %al
1060 ; AVX-LABEL: bool_reduction_v8i16:
1062 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1063 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1064 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1065 ; AVX-NEXT: testb %al, %al
1066 ; AVX-NEXT: setne %al
1069 ; AVX512-LABEL: bool_reduction_v8i16:
1071 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0
1072 ; AVX512-NEXT: kmovd %k0, %eax
1073 ; AVX512-NEXT: testb %al, %al
1074 ; AVX512-NEXT: setne %al
1076 %a = icmp slt <8 x i16> %x, %y
1077 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1078 %b = or <8 x i1> %s1, %a
1079 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1080 %c = or <8 x i1> %s2, %b
1081 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1082 %d = or <8 x i1> %s3, %c
1083 %e = extractelement <8 x i1> %d, i32 0
1087 define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1088 ; SSE-LABEL: bool_reduction_v16i8:
1090 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
1091 ; SSE-NEXT: pmovmskb %xmm0, %eax
1092 ; SSE-NEXT: testw %ax, %ax
1093 ; SSE-NEXT: setne %al
1096 ; AVX-LABEL: bool_reduction_v16i8:
1098 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
1099 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1100 ; AVX-NEXT: testw %ax, %ax
1101 ; AVX-NEXT: setne %al
1104 ; AVX512-LABEL: bool_reduction_v16i8:
1106 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
1107 ; AVX512-NEXT: kortestw %k0, %k0
1108 ; AVX512-NEXT: setne %al
1110 %a = icmp sgt <16 x i8> %x, %y
1111 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1112 %b = or <16 x i1> %s1, %a
1113 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1114 %c = or <16 x i1> %s2, %b
1115 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1116 %d = or <16 x i1> %s3, %c
1117 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1118 %e = or <16 x i1> %s4, %d
1119 %f = extractelement <16 x i1> %e, i32 0
1123 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1124 ; SSE-LABEL: bool_reduction_v4i64:
1126 ; SSE-NEXT: pcmpgtq %xmm1, %xmm3
1127 ; SSE-NEXT: pcmpgtq %xmm0, %xmm2
1128 ; SSE-NEXT: packssdw %xmm3, %xmm2
1129 ; SSE-NEXT: movmskps %xmm2, %eax
1130 ; SSE-NEXT: testb %al, %al
1131 ; SSE-NEXT: setne %al
1134 ; AVX1-LABEL: bool_reduction_v4i64:
1136 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1137 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1138 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1139 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1140 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1141 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1142 ; AVX1-NEXT: testb %al, %al
1143 ; AVX1-NEXT: setne %al
1144 ; AVX1-NEXT: vzeroupper
1147 ; AVX2-LABEL: bool_reduction_v4i64:
1149 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
1150 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1151 ; AVX2-NEXT: testb %al, %al
1152 ; AVX2-NEXT: setne %al
1153 ; AVX2-NEXT: vzeroupper
1156 ; AVX512-LABEL: bool_reduction_v4i64:
1158 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
1159 ; AVX512-NEXT: kmovd %k0, %eax
1160 ; AVX512-NEXT: testb $15, %al
1161 ; AVX512-NEXT: setne %al
1162 ; AVX512-NEXT: vzeroupper
1164 %a = icmp slt <4 x i64> %x, %y
1165 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1166 %b = or <4 x i1> %s1, %a
1167 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1168 %c = or <4 x i1> %s2, %b
1169 %d = extractelement <4 x i1> %c, i32 0
1173 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1174 ; SSE-LABEL: bool_reduction_v8i32:
1176 ; SSE-NEXT: pminud %xmm1, %xmm3
1177 ; SSE-NEXT: pcmpeqd %xmm1, %xmm3
1178 ; SSE-NEXT: pminud %xmm0, %xmm2
1179 ; SSE-NEXT: pcmpeqd %xmm0, %xmm2
1180 ; SSE-NEXT: packssdw %xmm3, %xmm2
1181 ; SSE-NEXT: packsswb %xmm0, %xmm2
1182 ; SSE-NEXT: pmovmskb %xmm2, %eax
1183 ; SSE-NEXT: testb %al, %al
1184 ; SSE-NEXT: setne %al
1187 ; AVX1-LABEL: bool_reduction_v8i32:
1189 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1190 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1191 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1192 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
1193 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
1194 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1195 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1196 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1197 ; AVX1-NEXT: testb %al, %al
1198 ; AVX1-NEXT: setne %al
1199 ; AVX1-NEXT: vzeroupper
1202 ; AVX2-LABEL: bool_reduction_v8i32:
1204 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
1205 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1206 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1207 ; AVX2-NEXT: testb %al, %al
1208 ; AVX2-NEXT: setne %al
1209 ; AVX2-NEXT: vzeroupper
1212 ; AVX512-LABEL: bool_reduction_v8i32:
1214 ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0
1215 ; AVX512-NEXT: kmovd %k0, %eax
1216 ; AVX512-NEXT: testb %al, %al
1217 ; AVX512-NEXT: setne %al
1218 ; AVX512-NEXT: vzeroupper
1220 %a = icmp ule <8 x i32> %x, %y
1221 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1222 %b = or <8 x i1> %s1, %a
1223 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1224 %c = or <8 x i1> %s2, %b
1225 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1226 %d = or <8 x i1> %s3, %c
1227 %e = extractelement <8 x i1> %d, i32 0
1231 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1232 ; SSE-LABEL: bool_reduction_v16i16:
1234 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
1235 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1236 ; SSE-NEXT: packsswb %xmm1, %xmm0
1237 ; SSE-NEXT: pmovmskb %xmm0, %eax
1238 ; SSE-NEXT: testw %ax, %ax
1239 ; SSE-NEXT: setne %al
1242 ; AVX1-LABEL: bool_reduction_v16i16:
1244 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1245 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1246 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
1247 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1248 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1249 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1250 ; AVX1-NEXT: testw %ax, %ax
1251 ; AVX1-NEXT: setne %al
1252 ; AVX1-NEXT: vzeroupper
1255 ; AVX2-LABEL: bool_reduction_v16i16:
1257 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1258 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1259 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1260 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1261 ; AVX2-NEXT: testw %ax, %ax
1262 ; AVX2-NEXT: setne %al
1263 ; AVX2-NEXT: vzeroupper
1266 ; AVX512-LABEL: bool_reduction_v16i16:
1268 ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
1269 ; AVX512-NEXT: kortestw %k0, %k0
1270 ; AVX512-NEXT: setne %al
1271 ; AVX512-NEXT: vzeroupper
1273 %a = icmp eq <16 x i16> %x, %y
1274 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1275 %b = or <16 x i1> %s1, %a
1276 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1277 %c = or <16 x i1> %s2, %b
1278 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1279 %d = or <16 x i1> %s3, %c
1280 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1281 %e = or <16 x i1> %s4, %d
1282 %f = extractelement <16 x i1> %e, i32 0
1286 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1287 ; SSE-LABEL: bool_reduction_v32i8:
1289 ; SSE-NEXT: pcmpeqb %xmm3, %xmm1
1290 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1291 ; SSE-NEXT: por %xmm1, %xmm0
1292 ; SSE-NEXT: pmovmskb %xmm0, %eax
1293 ; SSE-NEXT: testw %ax, %ax
1294 ; SSE-NEXT: setne %al
1297 ; AVX1-LABEL: bool_reduction_v32i8:
1299 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1300 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1301 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
1302 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1303 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1304 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1305 ; AVX1-NEXT: testw %ax, %ax
1306 ; AVX1-NEXT: setne %al
1307 ; AVX1-NEXT: vzeroupper
1310 ; AVX2-LABEL: bool_reduction_v32i8:
1312 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1313 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1314 ; AVX2-NEXT: testl %eax, %eax
1315 ; AVX2-NEXT: setne %al
1316 ; AVX2-NEXT: vzeroupper
1319 ; AVX512-LABEL: bool_reduction_v32i8:
1321 ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
1322 ; AVX512-NEXT: kortestd %k0, %k0
1323 ; AVX512-NEXT: setne %al
1324 ; AVX512-NEXT: vzeroupper
1326 %a = icmp eq <32 x i8> %x, %y
1327 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1328 %b = or <32 x i1> %s1, %a
1329 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1330 %c = or <32 x i1> %s2, %b
1331 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1332 %d = or <32 x i1> %s3, %c
1333 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1334 %e = or <32 x i1> %s4, %d
1335 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1336 %f = or <32 x i1> %s5, %e
1337 %g = extractelement <32 x i1> %f, i32 0