1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQBW
11 define <4 x i32> @test1(<4 x i1> %cond, <4 x i32> %x) {
14 ; SSE-NEXT: pslld $31, %xmm0
15 ; SSE-NEXT: psrad $31, %xmm0
16 ; SSE-NEXT: pandn %xmm1, %xmm0
21 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
22 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
23 ; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0
26 ; AVX512F-LABEL: test1:
28 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
29 ; AVX512F-NEXT: vptestnmd %xmm0, %xmm0, %k1
30 ; AVX512F-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
33 ; AVX512DQBW-LABEL: test1:
34 ; AVX512DQBW: # %bb.0:
35 ; AVX512DQBW-NEXT: vpslld $31, %xmm0, %xmm0
36 ; AVX512DQBW-NEXT: vpmovd2m %xmm0, %k0
37 ; AVX512DQBW-NEXT: knotw %k0, %k1
38 ; AVX512DQBW-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
39 ; AVX512DQBW-NEXT: retq
40 %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x
44 define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) {
47 ; SSE-NEXT: cmpneqps %xmm1, %xmm0
48 ; SSE-NEXT: andps %xmm2, %xmm0
53 ; AVX-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0
54 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
57 ; AVX512-LABEL: test2:
59 ; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k1
60 ; AVX512-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z}
62 %cond = fcmp oeq <4 x float> %a, %b
63 %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x
67 define float @fsel_zero_false_val(float %a, float %b, float %x) {
68 ; SSE-LABEL: fsel_zero_false_val:
70 ; SSE-NEXT: cmpeqss %xmm1, %xmm0
71 ; SSE-NEXT: andps %xmm2, %xmm0
74 ; AVX-LABEL: fsel_zero_false_val:
76 ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0
77 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
80 ; AVX512-LABEL: fsel_zero_false_val:
82 ; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
83 ; AVX512-NEXT: vmovss %xmm2, %xmm2, %xmm0 {%k1} {z}
85 %cond = fcmp oeq float %a, %b
86 %r = select i1 %cond, float %x, float 0.0
90 define float @fsel_zero_true_val(float %a, float %b, float %x) {
91 ; SSE-LABEL: fsel_zero_true_val:
93 ; SSE-NEXT: cmpeqss %xmm1, %xmm0
94 ; SSE-NEXT: andnps %xmm2, %xmm0
97 ; AVX-LABEL: fsel_zero_true_val:
99 ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0
100 ; AVX-NEXT: vandnps %xmm2, %xmm0, %xmm0
103 ; AVX512-LABEL: fsel_zero_true_val:
105 ; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
106 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
107 ; AVX512-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
108 ; AVX512-NEXT: vmovaps %xmm2, %xmm0
110 %cond = fcmp oeq float %a, %b
111 %r = select i1 %cond, float 0.0, float %x
115 define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
116 ; SSE-LABEL: fsel_nonzero_false_val:
118 ; SSE-NEXT: cmpeqsd %xmm1, %xmm0
119 ; SSE-NEXT: andpd %xmm0, %xmm2
120 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
121 ; SSE-NEXT: andnpd %xmm1, %xmm0
122 ; SSE-NEXT: orpd %xmm2, %xmm0
125 ; AVX-LABEL: fsel_nonzero_false_val:
127 ; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
128 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
129 ; AVX-NEXT: # xmm1 = mem[0,0]
130 ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
133 ; AVX512-LABEL: fsel_nonzero_false_val:
135 ; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
136 ; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
137 ; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1}
139 %cond = fcmp oeq double %x, %y
140 %r = select i1 %cond, double %z, double 42.0
144 define double @fsel_nonzero_true_val(double %x, double %y, double %z) {
145 ; SSE-LABEL: fsel_nonzero_true_val:
147 ; SSE-NEXT: cmpeqsd %xmm1, %xmm0
148 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
149 ; SSE-NEXT: andpd %xmm0, %xmm1
150 ; SSE-NEXT: andnpd %xmm2, %xmm0
151 ; SSE-NEXT: orpd %xmm1, %xmm0
154 ; AVX-LABEL: fsel_nonzero_true_val:
156 ; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
157 ; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
160 ; AVX512-LABEL: fsel_nonzero_true_val:
162 ; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
163 ; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 {%k1}
164 ; AVX512-NEXT: vmovapd %xmm2, %xmm0
166 %cond = fcmp oeq double %x, %y
167 %r = select i1 %cond, double 42.0, double %z
171 define double @fsel_nonzero_constants(double %x, double %y) {
172 ; SSE-LABEL: fsel_nonzero_constants:
174 ; SSE-NEXT: cmpeqsd %xmm1, %xmm0
175 ; SSE-NEXT: movq %xmm0, %rax
176 ; SSE-NEXT: andl $1, %eax
177 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
180 ; AVX-LABEL: fsel_nonzero_constants:
182 ; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
183 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
184 ; AVX-NEXT: # xmm1 = mem[0,0]
185 ; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
188 ; AVX512-LABEL: fsel_nonzero_constants:
190 ; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
191 ; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
192 ; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
194 %cond = fcmp oeq double %x, %y
195 %r = select i1 %cond, double 12.0, double 42.0
199 define <2 x double> @vsel_nonzero_constants(<2 x double> %x, <2 x double> %y) {
200 ; SSE2-LABEL: vsel_nonzero_constants:
202 ; SSE2-NEXT: cmplepd %xmm0, %xmm1
203 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
204 ; SSE2-NEXT: movapd %xmm1, %xmm0
205 ; SSE2-NEXT: andnpd %xmm2, %xmm0
206 ; SSE2-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
207 ; SSE2-NEXT: orpd %xmm1, %xmm0
210 ; SSE42-LABEL: vsel_nonzero_constants:
212 ; SSE42-NEXT: cmplepd %xmm0, %xmm1
213 ; SSE42-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
214 ; SSE42-NEXT: movapd %xmm1, %xmm0
215 ; SSE42-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
216 ; SSE42-NEXT: movapd %xmm2, %xmm0
219 ; AVX-LABEL: vsel_nonzero_constants:
221 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
222 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
223 ; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
226 ; AVX512-LABEL: vsel_nonzero_constants:
228 ; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1
229 ; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
230 ; AVX512-NEXT: vmovapd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
232 %cond = fcmp oge <2 x double> %x, %y
233 %r = select <2 x i1> %cond, <2 x double> <double 12.0, double -1.0>, <2 x double> <double 42.0, double 0.0>
237 define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
238 ; SSE-LABEL: signbit_mask_v16i8:
240 ; SSE-NEXT: pxor %xmm2, %xmm2
241 ; SSE-NEXT: pcmpgtb %xmm0, %xmm2
242 ; SSE-NEXT: pand %xmm1, %xmm2
243 ; SSE-NEXT: movdqa %xmm2, %xmm0
246 ; AVX-LABEL: signbit_mask_v16i8:
248 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
249 ; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
250 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
253 ; AVX512-LABEL: signbit_mask_v16i8:
255 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
256 ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
257 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
259 %cond = icmp slt <16 x i8> %a, zeroinitializer
260 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer
264 define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
265 ; SSE-LABEL: signbit_mask_v8i16:
267 ; SSE-NEXT: psraw $15, %xmm0
268 ; SSE-NEXT: pand %xmm1, %xmm0
271 ; AVX-LABEL: signbit_mask_v8i16:
273 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
274 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
277 ; AVX512-LABEL: signbit_mask_v8i16:
279 ; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0
280 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
282 %cond = icmp slt <8 x i16> %a, zeroinitializer
283 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer
287 define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
288 ; SSE-LABEL: signbit_mask_v4i32:
290 ; SSE-NEXT: psrad $31, %xmm0
291 ; SSE-NEXT: pand %xmm1, %xmm0
294 ; AVX-LABEL: signbit_mask_v4i32:
296 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
297 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
300 ; AVX512-LABEL: signbit_mask_v4i32:
302 ; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
303 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
305 %cond = icmp slt <4 x i32> %a, zeroinitializer
306 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer
310 define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
311 ; SSE2-LABEL: signbit_mask_v2i64:
313 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
314 ; SSE2-NEXT: psrad $31, %xmm0
315 ; SSE2-NEXT: pand %xmm1, %xmm0
318 ; SSE42-LABEL: signbit_mask_v2i64:
320 ; SSE42-NEXT: pxor %xmm2, %xmm2
321 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
322 ; SSE42-NEXT: pand %xmm1, %xmm2
323 ; SSE42-NEXT: movdqa %xmm2, %xmm0
326 ; AVX-LABEL: signbit_mask_v2i64:
328 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
329 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
330 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
333 ; AVX512-LABEL: signbit_mask_v2i64:
335 ; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0
336 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
338 %cond = icmp slt <2 x i64> %a, zeroinitializer
339 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer
343 ; Swap cmp pred and select ops. This is logically equivalent to the above test.
345 define <2 x i64> @signbit_mask_swap_v2i64(<2 x i64> %a, <2 x i64> %b) {
346 ; SSE2-LABEL: signbit_mask_swap_v2i64:
348 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
349 ; SSE2-NEXT: psrad $31, %xmm0
350 ; SSE2-NEXT: pand %xmm1, %xmm0
353 ; SSE42-LABEL: signbit_mask_swap_v2i64:
355 ; SSE42-NEXT: pxor %xmm2, %xmm2
356 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
357 ; SSE42-NEXT: pand %xmm1, %xmm2
358 ; SSE42-NEXT: movdqa %xmm2, %xmm0
361 ; AVX-LABEL: signbit_mask_swap_v2i64:
363 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
364 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
365 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
368 ; AVX512-LABEL: signbit_mask_swap_v2i64:
370 ; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0
371 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
373 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1>
374 %r = select <2 x i1> %cond, <2 x i64> zeroinitializer, <2 x i64> %b
378 define <32 x i8> @signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) {
379 ; SSE-LABEL: signbit_mask_v32i8:
381 ; SSE-NEXT: pxor %xmm4, %xmm4
382 ; SSE-NEXT: pxor %xmm5, %xmm5
383 ; SSE-NEXT: pcmpgtb %xmm0, %xmm5
384 ; SSE-NEXT: pand %xmm2, %xmm5
385 ; SSE-NEXT: pcmpgtb %xmm1, %xmm4
386 ; SSE-NEXT: pand %xmm3, %xmm4
387 ; SSE-NEXT: movdqa %xmm5, %xmm0
388 ; SSE-NEXT: movdqa %xmm4, %xmm1
391 ; AVX1-LABEL: signbit_mask_v32i8:
393 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
394 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
395 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
396 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0
397 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
398 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
401 ; AVX2-LABEL: signbit_mask_v32i8:
403 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
404 ; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
405 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
408 ; AVX512-LABEL: signbit_mask_v32i8:
410 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
411 ; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
412 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
414 %cond = icmp slt <32 x i8> %a, zeroinitializer
415 %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer
419 define <16 x i16> @signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) {
420 ; SSE-LABEL: signbit_mask_v16i16:
422 ; SSE-NEXT: psraw $15, %xmm0
423 ; SSE-NEXT: pand %xmm2, %xmm0
424 ; SSE-NEXT: psraw $15, %xmm1
425 ; SSE-NEXT: pand %xmm3, %xmm1
428 ; AVX1-LABEL: signbit_mask_v16i16:
430 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2
431 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
432 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
433 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
434 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
437 ; AVX2-LABEL: signbit_mask_v16i16:
439 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
440 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
443 ; AVX512-LABEL: signbit_mask_v16i16:
445 ; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0
446 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
448 %cond = icmp slt <16 x i16> %a, zeroinitializer
449 %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer
453 define <8 x i32> @signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) {
454 ; SSE-LABEL: signbit_mask_v8i32:
456 ; SSE-NEXT: psrad $31, %xmm0
457 ; SSE-NEXT: pand %xmm2, %xmm0
458 ; SSE-NEXT: psrad $31, %xmm1
459 ; SSE-NEXT: pand %xmm3, %xmm1
462 ; AVX1-LABEL: signbit_mask_v8i32:
464 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
465 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
466 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
467 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
468 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
471 ; AVX2-LABEL: signbit_mask_v8i32:
473 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
474 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
477 ; AVX512-LABEL: signbit_mask_v8i32:
479 ; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0
480 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
482 %cond = icmp slt <8 x i32> %a, zeroinitializer
483 %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer
487 ; Swap cmp pred and select ops. This is logically equivalent to the above test.
489 define <8 x i32> @signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) {
490 ; SSE-LABEL: signbit_mask_swap_v8i32:
492 ; SSE-NEXT: psrad $31, %xmm0
493 ; SSE-NEXT: pand %xmm2, %xmm0
494 ; SSE-NEXT: psrad $31, %xmm1
495 ; SSE-NEXT: pand %xmm3, %xmm1
498 ; AVX1-LABEL: signbit_mask_swap_v8i32:
500 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
501 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
502 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
503 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
504 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
507 ; AVX2-LABEL: signbit_mask_swap_v8i32:
509 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
510 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
513 ; AVX512-LABEL: signbit_mask_swap_v8i32:
515 ; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0
516 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
518 %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
519 %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b
523 define <4 x i64> @signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) {
524 ; SSE2-LABEL: signbit_mask_v4i64:
526 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
527 ; SSE2-NEXT: psrad $31, %xmm0
528 ; SSE2-NEXT: pand %xmm2, %xmm0
529 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
530 ; SSE2-NEXT: psrad $31, %xmm1
531 ; SSE2-NEXT: pand %xmm3, %xmm1
534 ; SSE42-LABEL: signbit_mask_v4i64:
536 ; SSE42-NEXT: pxor %xmm4, %xmm4
537 ; SSE42-NEXT: pxor %xmm5, %xmm5
538 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
539 ; SSE42-NEXT: pand %xmm2, %xmm5
540 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm4
541 ; SSE42-NEXT: pand %xmm3, %xmm4
542 ; SSE42-NEXT: movdqa %xmm5, %xmm0
543 ; SSE42-NEXT: movdqa %xmm4, %xmm1
546 ; AVX1-LABEL: signbit_mask_v4i64:
548 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
549 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
550 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
551 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
552 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
553 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
556 ; AVX2-LABEL: signbit_mask_v4i64:
558 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
559 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
560 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
563 ; AVX512-LABEL: signbit_mask_v4i64:
565 ; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0
566 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
568 %cond = icmp slt <4 x i64> %a, zeroinitializer
569 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer
573 define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) {
574 ; SSE-LABEL: signbit_setmask_v16i8:
576 ; SSE-NEXT: pxor %xmm2, %xmm2
577 ; SSE-NEXT: pcmpgtb %xmm0, %xmm2
578 ; SSE-NEXT: por %xmm1, %xmm2
579 ; SSE-NEXT: movdqa %xmm2, %xmm0
582 ; AVX-LABEL: signbit_setmask_v16i8:
584 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
585 ; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
586 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
589 ; AVX512-LABEL: signbit_setmask_v16i8:
591 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
592 ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
593 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
595 %cond = icmp slt <16 x i8> %a, zeroinitializer
596 %r = select <16 x i1> %cond, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %b
600 ; Swap cmp pred and select ops. This is logically equivalent to the above test.
602 define <16 x i8> @signbit_setmask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) {
603 ; SSE-LABEL: signbit_setmask_swap_v16i8:
605 ; SSE-NEXT: pxor %xmm2, %xmm2
606 ; SSE-NEXT: pcmpgtb %xmm0, %xmm2
607 ; SSE-NEXT: por %xmm1, %xmm2
608 ; SSE-NEXT: movdqa %xmm2, %xmm0
611 ; AVX-LABEL: signbit_setmask_swap_v16i8:
613 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
614 ; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
615 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
618 ; AVX512-LABEL: signbit_setmask_swap_v16i8:
620 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
621 ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
622 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
624 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
625 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
629 define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) {
630 ; SSE-LABEL: signbit_setmask_v8i16:
632 ; SSE-NEXT: psraw $15, %xmm0
633 ; SSE-NEXT: por %xmm1, %xmm0
636 ; AVX-LABEL: signbit_setmask_v8i16:
638 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
639 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
642 ; AVX512-LABEL: signbit_setmask_v8i16:
644 ; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0
645 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
647 %cond = icmp slt <8 x i16> %a, zeroinitializer
648 %r = select <8 x i1> %cond, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %b
652 define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) {
653 ; SSE-LABEL: signbit_setmask_v4i32:
655 ; SSE-NEXT: psrad $31, %xmm0
656 ; SSE-NEXT: por %xmm1, %xmm0
659 ; AVX-LABEL: signbit_setmask_v4i32:
661 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
662 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
665 ; AVX512-LABEL: signbit_setmask_v4i32:
667 ; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
668 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
670 %cond = icmp slt <4 x i32> %a, zeroinitializer
671 %r = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %b
675 define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) {
676 ; SSE2-LABEL: signbit_setmask_v2i64:
678 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
679 ; SSE2-NEXT: psrad $31, %xmm0
680 ; SSE2-NEXT: por %xmm1, %xmm0
683 ; SSE42-LABEL: signbit_setmask_v2i64:
685 ; SSE42-NEXT: pxor %xmm2, %xmm2
686 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
687 ; SSE42-NEXT: por %xmm1, %xmm2
688 ; SSE42-NEXT: movdqa %xmm2, %xmm0
691 ; AVX-LABEL: signbit_setmask_v2i64:
693 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
694 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
695 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
698 ; AVX512-LABEL: signbit_setmask_v2i64:
700 ; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0
701 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
703 %cond = icmp slt <2 x i64> %a, zeroinitializer
704 %r = select <2 x i1> %cond, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %b
708 define <32 x i8> @signbit_setmask_v32i8(<32 x i8> %a, <32 x i8> %b) {
709 ; SSE-LABEL: signbit_setmask_v32i8:
711 ; SSE-NEXT: pxor %xmm4, %xmm4
712 ; SSE-NEXT: pxor %xmm5, %xmm5
713 ; SSE-NEXT: pcmpgtb %xmm0, %xmm5
714 ; SSE-NEXT: por %xmm2, %xmm5
715 ; SSE-NEXT: pcmpgtb %xmm1, %xmm4
716 ; SSE-NEXT: por %xmm3, %xmm4
717 ; SSE-NEXT: movdqa %xmm5, %xmm0
718 ; SSE-NEXT: movdqa %xmm4, %xmm1
721 ; AVX1-LABEL: signbit_setmask_v32i8:
723 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
724 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
725 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
726 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0
727 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
728 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
731 ; AVX2-LABEL: signbit_setmask_v32i8:
733 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
734 ; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
735 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
738 ; AVX512-LABEL: signbit_setmask_v32i8:
740 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
741 ; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
742 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
744 %cond = icmp slt <32 x i8> %a, zeroinitializer
745 %r = select <32 x i1> %cond, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> %b
749 define <16 x i16> @signbit_setmask_v16i16(<16 x i16> %a, <16 x i16> %b) {
750 ; SSE-LABEL: signbit_setmask_v16i16:
752 ; SSE-NEXT: psraw $15, %xmm0
753 ; SSE-NEXT: por %xmm2, %xmm0
754 ; SSE-NEXT: psraw $15, %xmm1
755 ; SSE-NEXT: por %xmm3, %xmm1
758 ; AVX1-LABEL: signbit_setmask_v16i16:
760 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2
761 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
762 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
763 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
764 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
767 ; AVX2-LABEL: signbit_setmask_v16i16:
769 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
770 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
773 ; AVX512-LABEL: signbit_setmask_v16i16:
775 ; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0
776 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
778 %cond = icmp slt <16 x i16> %a, zeroinitializer
779 %r = select <16 x i1> %cond, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <16 x i16> %b
783 define <8 x i32> @signbit_setmask_v8i32(<8 x i32> %a, <8 x i32> %b) {
784 ; SSE-LABEL: signbit_setmask_v8i32:
786 ; SSE-NEXT: psrad $31, %xmm0
787 ; SSE-NEXT: por %xmm2, %xmm0
788 ; SSE-NEXT: psrad $31, %xmm1
789 ; SSE-NEXT: por %xmm3, %xmm1
792 ; AVX1-LABEL: signbit_setmask_v8i32:
794 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
795 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
796 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
797 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
798 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
801 ; AVX2-LABEL: signbit_setmask_v8i32:
803 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
804 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
807 ; AVX512-LABEL: signbit_setmask_v8i32:
809 ; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0
810 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
812 %cond = icmp slt <8 x i32> %a, zeroinitializer
813 %r = select <8 x i1> %cond, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> %b
817 define <4 x i64> @signbit_setmask_v4i64(<4 x i64> %a, <4 x i64> %b) {
818 ; SSE2-LABEL: signbit_setmask_v4i64:
820 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
821 ; SSE2-NEXT: psrad $31, %xmm0
822 ; SSE2-NEXT: por %xmm2, %xmm0
823 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
824 ; SSE2-NEXT: psrad $31, %xmm1
825 ; SSE2-NEXT: por %xmm3, %xmm1
828 ; SSE42-LABEL: signbit_setmask_v4i64:
830 ; SSE42-NEXT: pxor %xmm4, %xmm4
831 ; SSE42-NEXT: pxor %xmm5, %xmm5
832 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
833 ; SSE42-NEXT: por %xmm2, %xmm5
834 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm4
835 ; SSE42-NEXT: por %xmm3, %xmm4
836 ; SSE42-NEXT: movdqa %xmm5, %xmm0
837 ; SSE42-NEXT: movdqa %xmm4, %xmm1
840 ; AVX1-LABEL: signbit_setmask_v4i64:
842 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
843 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
844 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
845 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
846 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
847 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
850 ; AVX2-LABEL: signbit_setmask_v4i64:
852 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
853 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
854 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
857 ; AVX512-LABEL: signbit_setmask_v4i64:
859 ; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0
860 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
862 %cond = icmp slt <4 x i64> %a, zeroinitializer
863 %r = select <4 x i1> %cond, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> %b
867 ; Swap cmp pred and select ops. This is logically equivalent to the above test.
869 define <4 x i64> @signbit_setmask_swap_v4i64(<4 x i64> %a, <4 x i64> %b) {
870 ; SSE2-LABEL: signbit_setmask_swap_v4i64:
872 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
873 ; SSE2-NEXT: psrad $31, %xmm0
874 ; SSE2-NEXT: por %xmm2, %xmm0
875 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
876 ; SSE2-NEXT: psrad $31, %xmm1
877 ; SSE2-NEXT: por %xmm3, %xmm1
880 ; SSE42-LABEL: signbit_setmask_swap_v4i64:
882 ; SSE42-NEXT: pxor %xmm4, %xmm4
883 ; SSE42-NEXT: pxor %xmm5, %xmm5
884 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
885 ; SSE42-NEXT: por %xmm2, %xmm5
886 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm4
887 ; SSE42-NEXT: por %xmm3, %xmm4
888 ; SSE42-NEXT: movdqa %xmm5, %xmm0
889 ; SSE42-NEXT: movdqa %xmm4, %xmm1
892 ; AVX1-LABEL: signbit_setmask_swap_v4i64:
894 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
895 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
896 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
897 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
898 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
899 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
902 ; AVX2-LABEL: signbit_setmask_swap_v4i64:
904 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
905 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
906 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
909 ; AVX512-LABEL: signbit_setmask_swap_v4i64:
911 ; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0
912 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
914 %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
915 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
919 define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
920 ; SSE-LABEL: not_signbit_mask_v16i8:
922 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
923 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
924 ; SSE-NEXT: pand %xmm1, %xmm0
927 ; AVX-LABEL: not_signbit_mask_v16i8:
929 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
930 ; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
931 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
934 ; AVX512-LABEL: not_signbit_mask_v16i8:
936 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
937 ; AVX512-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
938 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
940 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
941 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer
945 define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
946 ; SSE-LABEL: not_signbit_mask_v8i16:
948 ; SSE-NEXT: psraw $15, %xmm0
949 ; SSE-NEXT: pandn %xmm1, %xmm0
952 ; AVX-LABEL: not_signbit_mask_v8i16:
954 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
955 ; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0
958 ; AVX512-LABEL: not_signbit_mask_v8i16:
960 ; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0
961 ; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
963 %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
964 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer
968 ; Swap cmp pred and select ops. This is logically equivalent to the above test.
970 define <8 x i16> @not_signbit_mask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) {
971 ; SSE-LABEL: not_signbit_mask_swap_v8i16:
973 ; SSE-NEXT: psraw $15, %xmm0
974 ; SSE-NEXT: pandn %xmm1, %xmm0
977 ; AVX-LABEL: not_signbit_mask_swap_v8i16:
979 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
980 ; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0
983 ; AVX512-LABEL: not_signbit_mask_swap_v8i16:
985 ; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0
986 ; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
988 %cond = icmp slt <8 x i16> %a, zeroinitializer
989 %r = select <8 x i1> %cond, <8 x i16> zeroinitializer, <8 x i16> %b
993 define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
994 ; SSE-LABEL: not_signbit_mask_v4i32:
996 ; SSE-NEXT: psrad $31, %xmm0
997 ; SSE-NEXT: pandn %xmm1, %xmm0
1000 ; AVX-LABEL: not_signbit_mask_v4i32:
1002 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
1003 ; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0
1006 ; AVX512-LABEL: not_signbit_mask_v4i32:
1008 ; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
1009 ; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
1011 %cond = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
1012 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer
1016 define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
1017 ; SSE2-LABEL: not_signbit_mask_v2i64:
1019 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1020 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1021 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1022 ; SSE2-NEXT: pand %xmm1, %xmm0
1025 ; SSE42-LABEL: not_signbit_mask_v2i64:
1027 ; SSE42-NEXT: pcmpeqd %xmm2, %xmm2
1028 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1029 ; SSE42-NEXT: pand %xmm1, %xmm0
1032 ; AVX-LABEL: not_signbit_mask_v2i64:
1034 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1035 ; AVX-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
1036 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
1039 ; AVX512-LABEL: not_signbit_mask_v2i64:
1041 ; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0
1042 ; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
1044 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1>
1045 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer
1049 define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) {
1050 ; SSE-LABEL: not_signbit_mask_v32i8:
1052 ; SSE-NEXT: pcmpeqd %xmm4, %xmm4
1053 ; SSE-NEXT: pcmpgtb %xmm4, %xmm0
1054 ; SSE-NEXT: pand %xmm2, %xmm0
1055 ; SSE-NEXT: pcmpgtb %xmm4, %xmm1
1056 ; SSE-NEXT: pand %xmm3, %xmm1
1059 ; AVX1-LABEL: not_signbit_mask_v32i8:
1061 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1062 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1063 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
1064 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0
1065 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1066 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0
1069 ; AVX2-LABEL: not_signbit_mask_v32i8:
1071 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
1072 ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1073 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1076 ; AVX512-LABEL: not_signbit_mask_v32i8:
1078 ; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
1079 ; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1080 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
1082 %cond = icmp sgt <32 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1083 %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer
1087 define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) {
1088 ; SSE-LABEL: not_signbit_mask_v16i16:
1090 ; SSE-NEXT: psraw $15, %xmm0
1091 ; SSE-NEXT: pandn %xmm2, %xmm0
1092 ; SSE-NEXT: psraw $15, %xmm1
1093 ; SSE-NEXT: pandn %xmm3, %xmm1
1096 ; AVX1-LABEL: not_signbit_mask_v16i16:
1098 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2
1099 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1100 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
1101 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1102 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0
1105 ; AVX2-LABEL: not_signbit_mask_v16i16:
1107 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
1108 ; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
1111 ; AVX512-LABEL: not_signbit_mask_v16i16:
1113 ; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0
1114 ; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0
1116 %cond = icmp sgt <16 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1117 %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer
1121 define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) {
1122 ; SSE-LABEL: not_signbit_mask_v8i32:
1124 ; SSE-NEXT: psrad $31, %xmm0
1125 ; SSE-NEXT: pandn %xmm2, %xmm0
1126 ; SSE-NEXT: psrad $31, %xmm1
1127 ; SSE-NEXT: pandn %xmm3, %xmm1
1130 ; AVX1-LABEL: not_signbit_mask_v8i32:
1132 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
1133 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1134 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
1135 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1136 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0
1139 ; AVX2-LABEL: not_signbit_mask_v8i32:
1141 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
1142 ; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
1145 ; AVX512-LABEL: not_signbit_mask_v8i32:
1147 ; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0
1148 ; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0
1150 %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1151 %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer
1155 ; Swap cmp pred and select ops. This is logically equivalent to the above test.
1157 define <8 x i32> @not_signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) {
1158 ; SSE-LABEL: not_signbit_mask_swap_v8i32:
1160 ; SSE-NEXT: psrad $31, %xmm0
1161 ; SSE-NEXT: pandn %xmm2, %xmm0
1162 ; SSE-NEXT: psrad $31, %xmm1
1163 ; SSE-NEXT: pandn %xmm3, %xmm1
1166 ; AVX1-LABEL: not_signbit_mask_swap_v8i32:
1168 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
1169 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1170 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
1171 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1172 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0
1175 ; AVX2-LABEL: not_signbit_mask_swap_v8i32:
1177 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
1178 ; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
1181 ; AVX512-LABEL: not_signbit_mask_swap_v8i32:
1183 ; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0
1184 ; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0
1186 %cond = icmp slt <8 x i32> %a, zeroinitializer
1187 %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b
1191 define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) {
1192 ; SSE2-LABEL: not_signbit_mask_v4i64:
1194 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1195 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
1196 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm0
1197 ; SSE2-NEXT: pand %xmm2, %xmm0
1198 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1199 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm1
1200 ; SSE2-NEXT: pand %xmm3, %xmm1
1203 ; SSE42-LABEL: not_signbit_mask_v4i64:
1205 ; SSE42-NEXT: pcmpeqd %xmm4, %xmm4
1206 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1207 ; SSE42-NEXT: pand %xmm2, %xmm0
1208 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm1
1209 ; SSE42-NEXT: pand %xmm3, %xmm1
1212 ; AVX1-LABEL: not_signbit_mask_v4i64:
1214 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1215 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1216 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1217 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
1218 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1219 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0
1222 ; AVX2-LABEL: not_signbit_mask_v4i64:
1224 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
1225 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
1226 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1229 ; AVX512-LABEL: not_signbit_mask_v4i64:
1231 ; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0
1232 ; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0
1234 %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
1235 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer