1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop | FileCheck %s --check-prefixes=CHECK,XOP
8 ; The condition vector for BLENDV* only cares about the sign bit of each element.
9 ; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
11 ; Test 128-bit vectors for all legal element types.
13 define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
14 ; CHECK-LABEL: signbit_sel_v16i8:
16 ; CHECK-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
18 %tr = icmp slt <16 x i8> %mask, zeroinitializer
19 %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
23 ; Sorry 16-bit, you're not important enough to support?
25 define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) {
26 ; AVX12-LABEL: signbit_sel_v8i16:
28 ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
29 ; AVX12-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
30 ; AVX12-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
33 ; AVX512F-LABEL: signbit_sel_v8i16:
35 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
36 ; AVX512F-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
37 ; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
40 ; AVX512VL-LABEL: signbit_sel_v8i16:
42 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
43 ; AVX512VL-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
44 ; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
47 ; XOP-LABEL: signbit_sel_v8i16:
49 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
50 ; XOP-NEXT: vpcomltw %xmm3, %xmm2, %xmm2
51 ; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
53 %tr = icmp slt <8 x i16> %mask, zeroinitializer
54 %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
58 define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
59 ; AVX12-LABEL: signbit_sel_v4i32:
61 ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
64 ; AVX512F-LABEL: signbit_sel_v4i32:
66 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
67 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
68 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
69 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
70 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
71 ; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
72 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
73 ; AVX512F-NEXT: vzeroupper
76 ; AVX512VL-LABEL: signbit_sel_v4i32:
78 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
79 ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
80 ; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
83 ; XOP-LABEL: signbit_sel_v4i32:
85 ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
87 %tr = icmp slt <4 x i32> %mask, zeroinitializer
88 %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
92 define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
93 ; AVX12-LABEL: signbit_sel_v2i64:
95 ; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
98 ; AVX512F-LABEL: signbit_sel_v2i64:
100 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
101 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
102 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
103 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
104 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
105 ; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
106 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
107 ; AVX512F-NEXT: vzeroupper
110 ; AVX512VL-LABEL: signbit_sel_v2i64:
112 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
113 ; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
114 ; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
115 ; AVX512VL-NEXT: retq
117 ; XOP-LABEL: signbit_sel_v2i64:
119 ; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
121 %tr = icmp slt <2 x i64> %mask, zeroinitializer
122 %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
126 define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) {
127 ; AVX12-LABEL: signbit_sel_v4f32:
129 ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
132 ; AVX512F-LABEL: signbit_sel_v4f32:
134 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
135 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
136 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
137 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
138 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
139 ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
140 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
141 ; AVX512F-NEXT: vzeroupper
144 ; AVX512VL-LABEL: signbit_sel_v4f32:
146 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
147 ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
148 ; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
149 ; AVX512VL-NEXT: retq
151 ; XOP-LABEL: signbit_sel_v4f32:
153 ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
155 %tr = icmp slt <4 x i32> %mask, zeroinitializer
156 %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
160 define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) {
161 ; AVX12-LABEL: signbit_sel_v2f64:
163 ; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
166 ; AVX512F-LABEL: signbit_sel_v2f64:
168 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
169 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
170 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
171 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
172 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
173 ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
174 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
175 ; AVX512F-NEXT: vzeroupper
178 ; AVX512VL-LABEL: signbit_sel_v2f64:
180 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
181 ; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
182 ; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
183 ; AVX512VL-NEXT: retq
185 ; XOP-LABEL: signbit_sel_v2f64:
187 ; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
189 %tr = icmp slt <2 x i64> %mask, zeroinitializer
190 %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
194 ; Test 256-bit vectors to see differences between AVX1 and AVX2.
196 define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) {
197 ; AVX1-LABEL: signbit_sel_v32i8:
199 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
200 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
201 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
202 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2
203 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
204 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
205 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
206 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
209 ; AVX2-LABEL: signbit_sel_v32i8:
211 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
214 ; AVX512-LABEL: signbit_sel_v32i8:
216 ; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
219 ; XOP-LABEL: signbit_sel_v32i8:
221 ; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
222 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
223 ; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3
224 ; XOP-NEXT: vpcomltb %xmm4, %xmm2, %xmm2
225 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
226 ; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
228 %tr = icmp slt <32 x i8> %mask, zeroinitializer
229 %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
233 ; Sorry 16-bit, you'll never be important enough to support?
235 define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) {
236 ; AVX1-LABEL: signbit_sel_v16i16:
238 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
239 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
240 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3
241 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2
242 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
243 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
244 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
245 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
248 ; AVX2-LABEL: signbit_sel_v16i16:
250 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
251 ; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
252 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
255 ; AVX512F-LABEL: signbit_sel_v16i16:
257 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
258 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
259 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
262 ; AVX512VL-LABEL: signbit_sel_v16i16:
264 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
265 ; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
266 ; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
267 ; AVX512VL-NEXT: retq
269 ; XOP-LABEL: signbit_sel_v16i16:
271 ; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
272 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
273 ; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3
274 ; XOP-NEXT: vpcomltw %xmm4, %xmm2, %xmm2
275 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
276 ; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
278 %tr = icmp slt <16 x i16> %mask, zeroinitializer
279 %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
283 define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
284 ; AVX12-LABEL: signbit_sel_v8i32:
286 ; AVX12-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
289 ; AVX512F-LABEL: signbit_sel_v8i32:
291 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
292 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
293 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
294 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
295 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
296 ; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
297 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
300 ; AVX512VL-LABEL: signbit_sel_v8i32:
302 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
303 ; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
304 ; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
305 ; AVX512VL-NEXT: retq
307 ; XOP-LABEL: signbit_sel_v8i32:
309 ; XOP-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
311 %tr = icmp slt <8 x i32> %mask, zeroinitializer
312 %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
316 define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
317 ; AVX12-LABEL: signbit_sel_v4i64:
319 ; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
322 ; AVX512F-LABEL: signbit_sel_v4i64:
324 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
325 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
326 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
327 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
328 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
329 ; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
330 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
333 ; AVX512VL-LABEL: signbit_sel_v4i64:
335 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
336 ; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
337 ; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
338 ; AVX512VL-NEXT: retq
340 ; XOP-LABEL: signbit_sel_v4i64:
342 ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
344 %tr = icmp slt <4 x i64> %mask, zeroinitializer
345 %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
349 define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) {
350 ; AVX12-LABEL: signbit_sel_v4f64:
352 ; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
355 ; AVX512F-LABEL: signbit_sel_v4f64:
357 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
358 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
359 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
360 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
361 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
362 ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
363 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
366 ; AVX512VL-LABEL: signbit_sel_v4f64:
368 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
369 ; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
370 ; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
371 ; AVX512VL-NEXT: retq
373 ; XOP-LABEL: signbit_sel_v4f64:
375 ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
377 %tr = icmp slt <4 x i64> %mask, zeroinitializer
378 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
382 ; Try a condition with a different type than the select operands.
384 define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) {
385 ; AVX1-LABEL: signbit_sel_v4f64_small_mask:
387 ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3
388 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
389 ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2
390 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
391 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
394 ; AVX2-LABEL: signbit_sel_v4f64_small_mask:
396 ; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2
397 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
400 ; AVX512F-LABEL: signbit_sel_v4f64_small_mask:
402 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
403 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
404 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
405 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
406 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
407 ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
408 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
411 ; AVX512VL-LABEL: signbit_sel_v4f64_small_mask:
413 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
414 ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
415 ; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
416 ; AVX512VL-NEXT: retq
418 ; XOP-LABEL: signbit_sel_v4f64_small_mask:
420 ; XOP-NEXT: vpmovsxdq %xmm2, %xmm3
421 ; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
422 ; XOP-NEXT: vpmovsxdq %xmm2, %xmm2
423 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
424 ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
426 %tr = icmp slt <4 x i32> %mask, zeroinitializer
427 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
431 ; Try a 512-bit vector to make sure AVX-512 is handled as expected.
433 define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) {
434 ; AVX12-LABEL: signbit_sel_v8f64:
436 ; AVX12-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
437 ; AVX12-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
440 ; AVX512-LABEL: signbit_sel_v8f64:
442 ; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
443 ; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
444 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
447 ; XOP-LABEL: signbit_sel_v8f64:
449 ; XOP-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
450 ; XOP-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
452 %tr = icmp slt <8 x i64> %mask, zeroinitializer
453 %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
457 ; If we have a floating-point compare:
459 ; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
461 define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
462 ; AVX12-LABEL: signbit_sel_v4f32_fcmp:
464 ; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2
465 ; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
466 ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
469 ; AVX512F-LABEL: signbit_sel_v4f32_fcmp:
471 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
472 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
473 ; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2
474 ; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1
475 ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
476 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
477 ; AVX512F-NEXT: vzeroupper
480 ; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
482 ; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2
483 ; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1
484 ; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
485 ; AVX512VL-NEXT: retq
487 ; XOP-LABEL: signbit_sel_v4f32_fcmp:
489 ; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2
490 ; XOP-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
491 ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
493 %cmp = fcmp olt <4 x float> %x, zeroinitializer
494 %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
498 define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
499 ; AVX1-LABEL: blend_splat1_mask_cond_v4i64:
501 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3
502 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
503 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
504 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
505 ; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
508 ; AVX2-LABEL: blend_splat1_mask_cond_v4i64:
510 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
511 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
514 ; AVX512F-LABEL: blend_splat1_mask_cond_v4i64:
516 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
517 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
518 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
519 ; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
520 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
521 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
524 ; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64:
526 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
527 ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
528 ; AVX512VL-NEXT: retq
530 ; XOP-LABEL: blend_splat1_mask_cond_v4i64:
532 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
533 ; XOP-NEXT: vpsllq $63, %xmm3, %xmm3
534 ; XOP-NEXT: vmovddup {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
535 ; XOP-NEXT: # xmm4 = mem[0,0]
536 ; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
537 ; XOP-NEXT: vpsllq $63, %xmm0, %xmm0
538 ; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
539 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
540 ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
542 %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
543 %c = icmp eq <4 x i64> %a, zeroinitializer
544 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
548 define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
549 ; AVX12-LABEL: blend_splat1_mask_cond_v4i32:
551 ; AVX12-NEXT: vpslld $31, %xmm0, %xmm0
552 ; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
555 ; AVX512F-LABEL: blend_splat1_mask_cond_v4i32:
557 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
558 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
559 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
560 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
561 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
562 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
563 ; AVX512F-NEXT: vzeroupper
566 ; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32:
568 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
569 ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
570 ; AVX512VL-NEXT: retq
572 ; XOP-LABEL: blend_splat1_mask_cond_v4i32:
574 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
575 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
576 ; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
577 ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
579 %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
580 %c = icmp eq <4 x i32> %a, zeroinitializer
581 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
585 define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
586 ; AVX1-LABEL: blend_splat1_mask_cond_v16i16:
588 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm3
589 ; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3
590 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
591 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
592 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
593 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
594 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1
595 ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
596 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
599 ; AVX2-LABEL: blend_splat1_mask_cond_v16i16:
601 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
602 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
603 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
606 ; AVX512F-LABEL: blend_splat1_mask_cond_v16i16:
608 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
609 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
610 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
611 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
614 ; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16:
616 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
617 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
618 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
619 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
620 ; AVX512VL-NEXT: retq
622 ; XOP-LABEL: blend_splat1_mask_cond_v16i16:
624 ; XOP-NEXT: vpsllw $15, %xmm0, %xmm3
625 ; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
626 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
627 ; XOP-NEXT: vpsllw $15, %xmm0, %xmm0
628 ; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
629 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
630 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
632 %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
633 %c = icmp eq <16 x i16> %a, zeroinitializer
634 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
638 define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
639 ; AVX12-LABEL: blend_splat1_mask_cond_v16i8:
641 ; AVX12-NEXT: vpsllw $7, %xmm0, %xmm0
642 ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
645 ; AVX512F-LABEL: blend_splat1_mask_cond_v16i8:
647 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
648 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
649 ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
650 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
653 ; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8:
655 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
656 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
657 ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
658 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
659 ; AVX512VL-NEXT: retq
661 ; XOP-LABEL: blend_splat1_mask_cond_v16i8:
663 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
664 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
665 ; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
666 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
668 %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
669 %c = icmp eq <16 x i8> %a, zeroinitializer
670 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
674 define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
675 ; AVX12-LABEL: blend_splatmax_mask_cond_v2i64:
677 ; AVX12-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
680 ; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64:
682 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
683 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
684 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
685 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
686 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1
687 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
688 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
689 ; AVX512F-NEXT: vzeroupper
692 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64:
694 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
695 ; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
696 ; AVX512VL-NEXT: retq
698 ; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
700 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
701 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
702 ; XOP-NEXT: vpcomneqq %xmm3, %xmm0, %xmm0
703 ; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
705 %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
706 %c = icmp eq <2 x i64> %a, zeroinitializer
707 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
711 define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
712 ; AVX12-LABEL: blend_splatmax_mask_cond_v8i32:
714 ; AVX12-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
717 ; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32:
719 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
720 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
721 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
722 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
723 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
724 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
727 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32:
729 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
730 ; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
731 ; AVX512VL-NEXT: retq
733 ; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
735 ; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
737 %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
738 %c = icmp eq <8 x i32> %a, zeroinitializer
739 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
743 define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
744 ; AVX12-LABEL: blend_splatmax_mask_cond_v8i16:
746 ; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
747 ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
750 ; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16:
752 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
753 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
754 ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
755 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
758 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16:
760 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
761 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
762 ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
763 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
764 ; AVX512VL-NEXT: retq
766 ; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
768 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
769 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
770 ; XOP-NEXT: vpcomneqw %xmm3, %xmm0, %xmm0
771 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
773 %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
774 %c = icmp eq <8 x i16> %a, zeroinitializer
775 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
779 define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
780 ; AVX1-LABEL: blend_splatmax_mask_cond_v32i8:
782 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
783 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
784 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
785 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0
786 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
787 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1
788 ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
789 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
792 ; AVX2-LABEL: blend_splatmax_mask_cond_v32i8:
794 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
797 ; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8:
799 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
800 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
801 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
802 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
805 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8:
807 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
808 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
809 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
810 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
811 ; AVX512VL-NEXT: retq
813 ; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
815 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
816 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
817 ; XOP-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
818 ; XOP-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0
819 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
820 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
822 %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
823 %c = icmp eq <32 x i8> %a, zeroinitializer
824 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
828 define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
829 ; AVX1-LABEL: blend_splat_mask_cond_v4i64:
831 ; AVX1-NEXT: vpsllq $62, %xmm0, %xmm3
832 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
833 ; AVX1-NEXT: vpsllq $62, %xmm0, %xmm0
834 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
835 ; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
838 ; AVX2-LABEL: blend_splat_mask_cond_v4i64:
840 ; AVX2-NEXT: vpsllq $62, %ymm0, %ymm0
841 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
844 ; AVX512F-LABEL: blend_splat_mask_cond_v4i64:
846 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
847 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
848 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
849 ; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
850 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
851 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
854 ; AVX512VL-LABEL: blend_splat_mask_cond_v4i64:
856 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
857 ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
858 ; AVX512VL-NEXT: retq
860 ; XOP-LABEL: blend_splat_mask_cond_v4i64:
862 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
863 ; XOP-NEXT: vpsllq $62, %xmm3, %xmm3
864 ; XOP-NEXT: vmovddup {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
865 ; XOP-NEXT: # xmm4 = mem[0,0]
866 ; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
867 ; XOP-NEXT: vpsllq $62, %xmm0, %xmm0
868 ; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
869 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
870 ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
872 %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
873 %c = icmp eq <4 x i64> %a, zeroinitializer
874 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
878 define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
879 ; AVX12-LABEL: blend_splat_mask_cond_v4i32:
881 ; AVX12-NEXT: vpslld $15, %xmm0, %xmm0
882 ; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
885 ; AVX512F-LABEL: blend_splat_mask_cond_v4i32:
887 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
888 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
889 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
890 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
891 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
892 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
893 ; AVX512F-NEXT: vzeroupper
896 ; AVX512VL-LABEL: blend_splat_mask_cond_v4i32:
898 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
899 ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
900 ; AVX512VL-NEXT: retq
902 ; XOP-LABEL: blend_splat_mask_cond_v4i32:
904 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
905 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
906 ; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
907 ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
909 %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
910 %c = icmp eq <4 x i32> %a, zeroinitializer
911 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
915 define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
916 ; AVX1-LABEL: blend_splat_mask_cond_v16i16:
918 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm3
919 ; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3
920 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
921 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
922 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
923 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
924 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1
925 ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
926 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
929 ; AVX2-LABEL: blend_splat_mask_cond_v16i16:
931 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
932 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
933 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
936 ; AVX512F-LABEL: blend_splat_mask_cond_v16i16:
938 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
939 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
940 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
941 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
944 ; AVX512VL-LABEL: blend_splat_mask_cond_v16i16:
946 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
947 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
948 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
949 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
950 ; AVX512VL-NEXT: retq
952 ; XOP-LABEL: blend_splat_mask_cond_v16i16:
954 ; XOP-NEXT: vpsllw $5, %xmm0, %xmm3
955 ; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
956 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
957 ; XOP-NEXT: vpsllw $5, %xmm0, %xmm0
958 ; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
959 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
960 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
962 %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
963 %c = icmp eq <16 x i16> %a, zeroinitializer
964 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
968 define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
969 ; AVX12-LABEL: blend_splat_mask_cond_v16i8:
971 ; AVX12-NEXT: vpsllw $5, %xmm0, %xmm0
972 ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
975 ; AVX512F-LABEL: blend_splat_mask_cond_v16i8:
977 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
978 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
979 ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
980 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
983 ; AVX512VL-LABEL: blend_splat_mask_cond_v16i8:
985 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
986 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
987 ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
988 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
989 ; AVX512VL-NEXT: retq
991 ; XOP-LABEL: blend_splat_mask_cond_v16i8:
993 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
994 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
995 ; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
996 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
998 %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
999 %c = icmp eq <16 x i8> %a, zeroinitializer
1000 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
1004 define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
1005 ; AVX1-LABEL: blend_mask_cond_v2i64:
1007 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1008 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1009 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1010 ; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
1013 ; AVX2-LABEL: blend_mask_cond_v2i64:
1015 ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1016 ; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
1019 ; AVX512F-LABEL: blend_mask_cond_v2i64:
1021 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
1022 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1023 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1024 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4]
1025 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1
1026 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
1027 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1028 ; AVX512F-NEXT: vzeroupper
1029 ; AVX512F-NEXT: retq
1031 ; AVX512VL-LABEL: blend_mask_cond_v2i64:
1032 ; AVX512VL: # %bb.0:
1033 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
1034 ; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
1035 ; AVX512VL-NEXT: retq
1037 ; XOP-LABEL: blend_mask_cond_v2i64:
1039 ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1040 ; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
1042 %a = and <2 x i64> %x, <i64 1, i64 4>
1043 %c = icmp eq <2 x i64> %a, zeroinitializer
1044 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
1048 define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
1049 ; AVX1-LABEL: blend_mask_cond_v4i32:
1051 ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1052 ; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1055 ; AVX2-LABEL: blend_mask_cond_v4i32:
1057 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1058 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1061 ; AVX512F-LABEL: blend_mask_cond_v4i32:
1063 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
1064 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1065 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1066 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [65536,512,2,1]
1067 ; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1
1068 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
1069 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1070 ; AVX512F-NEXT: vzeroupper
1071 ; AVX512F-NEXT: retq
1073 ; AVX512VL-LABEL: blend_mask_cond_v4i32:
1074 ; AVX512VL: # %bb.0:
1075 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
1076 ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
1077 ; AVX512VL-NEXT: retq
1079 ; XOP-LABEL: blend_mask_cond_v4i32:
1081 ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1082 ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1084 %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
1085 %c = icmp eq <4 x i32> %a, zeroinitializer
1086 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
1090 define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
1091 ; AVX12-LABEL: blend_mask_cond_v8i16:
1093 ; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1094 ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
1095 ; AVX12-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1096 ; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1099 ; AVX512F-LABEL: blend_mask_cond_v8i16:
1101 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1102 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1103 ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1104 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1105 ; AVX512F-NEXT: retq
1107 ; AVX512VL-LABEL: blend_mask_cond_v8i16:
1108 ; AVX512VL: # %bb.0:
1109 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1110 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1111 ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1112 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
1113 ; AVX512VL-NEXT: retq
1115 ; XOP-LABEL: blend_mask_cond_v8i16:
1117 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
1118 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1119 ; XOP-NEXT: vpcomltw %xmm3, %xmm0, %xmm0
1120 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1122 %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096>
1123 %c = icmp eq <8 x i16> %a, zeroinitializer
1124 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
1128 define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
1129 ; AVX12-LABEL: blend_mask_cond_v16i8:
1131 ; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1132 ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
1133 ; AVX12-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1134 ; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1137 ; AVX512F-LABEL: blend_mask_cond_v16i8:
1139 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1140 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1141 ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1142 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1143 ; AVX512F-NEXT: retq
1145 ; AVX512VL-LABEL: blend_mask_cond_v16i8:
1146 ; AVX512VL: # %bb.0:
1147 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1148 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1149 ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1150 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
1151 ; AVX512VL-NEXT: retq
1153 ; XOP-LABEL: blend_mask_cond_v16i8:
1155 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1156 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1158 %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
1159 %c = icmp eq <16 x i8> %a, zeroinitializer
1160 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
1164 define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
1165 ; AVX1-LABEL: blend_mask_cond_v4i64:
1167 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1168 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1169 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1170 ; AVX1-NEXT: vpcmpeqq %xmm4, %xmm3, %xmm3
1171 ; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0
1172 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1173 ; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
1176 ; AVX2-LABEL: blend_mask_cond_v4i64:
1178 ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1179 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1182 ; AVX512F-LABEL: blend_mask_cond_v4i64:
1184 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
1185 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1186 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1187 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [2,4,32768,1]
1188 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1
1189 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
1190 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1191 ; AVX512F-NEXT: retq
1193 ; AVX512VL-LABEL: blend_mask_cond_v4i64:
1194 ; AVX512VL: # %bb.0:
1195 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1196 ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
1197 ; AVX512VL-NEXT: retq
1199 ; XOP-LABEL: blend_mask_cond_v4i64:
1201 ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1202 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
1203 ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1204 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1205 ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1207 %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1>
1208 %c = icmp eq <4 x i64> %a, zeroinitializer
1209 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
1213 define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
1214 ; AVX1-LABEL: blend_mask_cond_v8i32:
1216 ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1217 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1218 ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1219 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1220 ; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1223 ; AVX2-LABEL: blend_mask_cond_v8i32:
1225 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1226 ; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1229 ; AVX512F-LABEL: blend_mask_cond_v8i32:
1231 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
1232 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1233 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1234 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096]
1235 ; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1
1236 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
1237 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1238 ; AVX512F-NEXT: retq
1240 ; AVX512VL-LABEL: blend_mask_cond_v8i32:
1241 ; AVX512VL: # %bb.0:
1242 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1243 ; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
1244 ; AVX512VL-NEXT: retq
1246 ; XOP-LABEL: blend_mask_cond_v8i32:
1248 ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1249 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
1250 ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1251 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1252 ; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1254 %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096>
1255 %c = icmp eq <8 x i32> %a, zeroinitializer
1256 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
1260 define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
1261 ; AVX1-LABEL: blend_mask_cond_v16i16:
1263 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1264 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1265 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1266 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
1267 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
1268 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1269 ; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2
1270 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
1271 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
1274 ; AVX2-LABEL: blend_mask_cond_v16i16:
1276 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1277 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1278 ; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
1279 ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1282 ; AVX512F-LABEL: blend_mask_cond_v16i16:
1284 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1285 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1286 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
1287 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1288 ; AVX512F-NEXT: retq
1290 ; AVX512VL-LABEL: blend_mask_cond_v16i16:
1291 ; AVX512VL: # %bb.0:
1292 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1293 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1294 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
1295 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
1296 ; AVX512VL-NEXT: retq
1298 ; XOP-LABEL: blend_mask_cond_v16i16:
1300 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
1301 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1302 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
1303 ; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3
1304 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1305 ; XOP-NEXT: vpcomltw %xmm4, %xmm0, %xmm0
1306 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1307 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1309 %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
1310 %c = icmp eq <16 x i16> %a, zeroinitializer
1311 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
1315 define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
1316 ; AVX1-LABEL: blend_mask_cond_v32i8:
1318 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1319 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1320 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1321 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
1322 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
1323 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1324 ; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2
1325 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
1326 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
1329 ; AVX2-LABEL: blend_mask_cond_v32i8:
1331 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1332 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1333 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1334 ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1337 ; AVX512F-LABEL: blend_mask_cond_v32i8:
1339 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1340 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1341 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1342 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1343 ; AVX512F-NEXT: retq
1345 ; AVX512VL-LABEL: blend_mask_cond_v32i8:
1346 ; AVX512VL: # %bb.0:
1347 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1348 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1349 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1350 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
1351 ; AVX512VL-NEXT: retq
1353 ; XOP-LABEL: blend_mask_cond_v32i8:
1355 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
1356 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1357 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
1358 ; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3
1359 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1360 ; XOP-NEXT: vpcomltb %xmm4, %xmm0, %xmm0
1361 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1362 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1364 %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16>
1365 %c = icmp eq <32 x i8> %a, zeroinitializer
1366 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
1370 define void @PR46531(ptr %x, ptr %y, ptr %z) {
1371 ; AVX12-LABEL: PR46531:
1373 ; AVX12-NEXT: vmovdqu (%rsi), %xmm0
1374 ; AVX12-NEXT: vmovdqu (%rdx), %xmm1
1375 ; AVX12-NEXT: vpor %xmm0, %xmm1, %xmm2
1376 ; AVX12-NEXT: vpxor %xmm0, %xmm1, %xmm0
1377 ; AVX12-NEXT: vpslld $31, %xmm1, %xmm1
1378 ; AVX12-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
1379 ; AVX12-NEXT: vmovups %xmm0, (%rdi)
1382 ; AVX512F-LABEL: PR46531:
1384 ; AVX512F-NEXT: vmovdqu (%rsi), %xmm0
1385 ; AVX512F-NEXT: vmovdqu (%rdx), %xmm1
1386 ; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm2
1387 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
1388 ; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
1389 ; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
1390 ; AVX512F-NEXT: vmovdqu %xmm0, (%rdi)
1391 ; AVX512F-NEXT: vzeroupper
1392 ; AVX512F-NEXT: retq
1394 ; AVX512VL-LABEL: PR46531:
1395 ; AVX512VL: # %bb.0:
1396 ; AVX512VL-NEXT: vmovdqu (%rsi), %xmm0
1397 ; AVX512VL-NEXT: vmovdqu (%rdx), %xmm1
1398 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
1399 ; AVX512VL-NEXT: vpxor %xmm0, %xmm1, %xmm2
1400 ; AVX512VL-NEXT: vpord %xmm0, %xmm1, %xmm2 {%k1}
1401 ; AVX512VL-NEXT: vmovdqu %xmm2, (%rdi)
1402 ; AVX512VL-NEXT: retq
1404 ; XOP-LABEL: PR46531:
1406 ; XOP-NEXT: vmovdqu (%rsi), %xmm0
1407 ; XOP-NEXT: vmovdqu (%rdx), %xmm1
1408 ; XOP-NEXT: vpor %xmm0, %xmm1, %xmm2
1409 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
1410 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
1411 ; XOP-NEXT: vpcomneqd %xmm4, %xmm3, %xmm3
1412 ; XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0
1413 ; XOP-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0
1414 ; XOP-NEXT: vmovups %xmm0, (%rdi)
1416 %a = load <4 x i32>, ptr %y, align 4
1417 %b = load <4 x i32>, ptr %z, align 4
1418 %or = or <4 x i32> %b, %a
1419 %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1>
1420 %cmp = icmp eq <4 x i32> %and, zeroinitializer
1421 %xor = xor <4 x i32> %b, %a
1422 %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor
1423 store <4 x i32> %sel, ptr %x, align 4
1427 attributes #0 = { "no-nans-fp-math"="true" }
1428 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: