1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop | FileCheck %s --check-prefixes=CHECK,XOP
8 ; The condition vector for BLENDV* only cares about the sign bit of each element.
9 ; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
11 ; Test 128-bit vectors for all legal element types.
13 define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
14 ; CHECK-LABEL: signbit_sel_v16i8:
16 ; CHECK-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
18 %tr = icmp slt <16 x i8> %mask, zeroinitializer
19 %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
23 ; Sorry 16-bit, you're not important enough to support?
25 define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) {
26 ; AVX12-LABEL: signbit_sel_v8i16:
28 ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
29 ; AVX12-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
30 ; AVX12-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
33 ; AVX512F-LABEL: signbit_sel_v8i16:
35 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
36 ; AVX512F-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
37 ; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
40 ; AVX512VL-LABEL: signbit_sel_v8i16:
42 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
43 ; AVX512VL-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
44 ; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
47 ; XOP-LABEL: signbit_sel_v8i16:
49 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
50 ; XOP-NEXT: vpcomltw %xmm3, %xmm2, %xmm2
51 ; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
53 %tr = icmp slt <8 x i16> %mask, zeroinitializer
54 %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
58 define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
59 ; AVX12-LABEL: signbit_sel_v4i32:
61 ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
64 ; AVX512F-LABEL: signbit_sel_v4i32:
66 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
67 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
68 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
69 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
70 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
71 ; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
72 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
73 ; AVX512F-NEXT: vzeroupper
76 ; AVX512VL-LABEL: signbit_sel_v4i32:
78 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
79 ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
80 ; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
83 ; XOP-LABEL: signbit_sel_v4i32:
85 ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
87 %tr = icmp slt <4 x i32> %mask, zeroinitializer
88 %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
92 define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
93 ; AVX12-LABEL: signbit_sel_v2i64:
95 ; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
98 ; AVX512F-LABEL: signbit_sel_v2i64:
100 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
101 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
102 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
103 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
104 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
105 ; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
106 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
107 ; AVX512F-NEXT: vzeroupper
110 ; AVX512VL-LABEL: signbit_sel_v2i64:
112 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
113 ; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
114 ; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
115 ; AVX512VL-NEXT: retq
117 ; XOP-LABEL: signbit_sel_v2i64:
119 ; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
121 %tr = icmp slt <2 x i64> %mask, zeroinitializer
122 %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
126 define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) {
127 ; AVX12-LABEL: signbit_sel_v4f32:
129 ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
132 ; AVX512F-LABEL: signbit_sel_v4f32:
134 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
135 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
136 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
137 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
138 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
139 ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
140 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
141 ; AVX512F-NEXT: vzeroupper
144 ; AVX512VL-LABEL: signbit_sel_v4f32:
146 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
147 ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
148 ; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
149 ; AVX512VL-NEXT: retq
151 ; XOP-LABEL: signbit_sel_v4f32:
153 ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
155 %tr = icmp slt <4 x i32> %mask, zeroinitializer
156 %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
160 define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) {
161 ; AVX12-LABEL: signbit_sel_v2f64:
163 ; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
166 ; AVX512F-LABEL: signbit_sel_v2f64:
168 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
169 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
170 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
171 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
172 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
173 ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
174 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
175 ; AVX512F-NEXT: vzeroupper
178 ; AVX512VL-LABEL: signbit_sel_v2f64:
180 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
181 ; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
182 ; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
183 ; AVX512VL-NEXT: retq
185 ; XOP-LABEL: signbit_sel_v2f64:
187 ; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
189 %tr = icmp slt <2 x i64> %mask, zeroinitializer
190 %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
194 ; Test 256-bit vectors to see differences between AVX1 and AVX2.
196 define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) {
197 ; AVX1-LABEL: signbit_sel_v32i8:
199 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
200 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
201 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
202 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2
203 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
204 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
205 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
206 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
209 ; AVX2-LABEL: signbit_sel_v32i8:
211 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
214 ; AVX512-LABEL: signbit_sel_v32i8:
216 ; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
219 ; XOP-LABEL: signbit_sel_v32i8:
221 ; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
222 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
223 ; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3
224 ; XOP-NEXT: vpcomltb %xmm4, %xmm2, %xmm2
225 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
226 ; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
228 %tr = icmp slt <32 x i8> %mask, zeroinitializer
229 %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
233 ; Sorry 16-bit, you'll never be important enough to support?
235 define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) {
236 ; AVX1-LABEL: signbit_sel_v16i16:
238 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
239 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
240 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3
241 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2
242 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
243 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
244 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
245 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
248 ; AVX2-LABEL: signbit_sel_v16i16:
250 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
251 ; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
252 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
255 ; AVX512F-LABEL: signbit_sel_v16i16:
257 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
258 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
259 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
262 ; AVX512VL-LABEL: signbit_sel_v16i16:
264 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
265 ; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
266 ; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
267 ; AVX512VL-NEXT: retq
269 ; XOP-LABEL: signbit_sel_v16i16:
271 ; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
272 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
273 ; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3
274 ; XOP-NEXT: vpcomltw %xmm4, %xmm2, %xmm2
275 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
276 ; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
278 %tr = icmp slt <16 x i16> %mask, zeroinitializer
279 %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
283 define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
284 ; AVX12-LABEL: signbit_sel_v8i32:
286 ; AVX12-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
289 ; AVX512F-LABEL: signbit_sel_v8i32:
291 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
292 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
293 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
294 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
295 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
296 ; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
297 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
300 ; AVX512VL-LABEL: signbit_sel_v8i32:
302 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
303 ; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
304 ; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
305 ; AVX512VL-NEXT: retq
307 ; XOP-LABEL: signbit_sel_v8i32:
309 ; XOP-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
311 %tr = icmp slt <8 x i32> %mask, zeroinitializer
312 %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
316 define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
317 ; AVX12-LABEL: signbit_sel_v4i64:
319 ; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
322 ; AVX512F-LABEL: signbit_sel_v4i64:
324 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
325 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
326 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
327 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
328 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
329 ; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
330 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
333 ; AVX512VL-LABEL: signbit_sel_v4i64:
335 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
336 ; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
337 ; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
338 ; AVX512VL-NEXT: retq
340 ; XOP-LABEL: signbit_sel_v4i64:
342 ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
344 %tr = icmp slt <4 x i64> %mask, zeroinitializer
345 %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
349 define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) {
350 ; AVX12-LABEL: signbit_sel_v4f64:
352 ; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
355 ; AVX512F-LABEL: signbit_sel_v4f64:
357 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
358 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
359 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
360 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
361 ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
362 ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
363 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
366 ; AVX512VL-LABEL: signbit_sel_v4f64:
368 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
369 ; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
370 ; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
371 ; AVX512VL-NEXT: retq
373 ; XOP-LABEL: signbit_sel_v4f64:
375 ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
377 %tr = icmp slt <4 x i64> %mask, zeroinitializer
378 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
382 ; Try a condition with a different type than the select operands.
384 define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) {
385 ; AVX1-LABEL: signbit_sel_v4f64_small_mask:
387 ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3
388 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
389 ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2
390 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
391 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
394 ; AVX2-LABEL: signbit_sel_v4f64_small_mask:
396 ; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2
397 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
400 ; AVX512F-LABEL: signbit_sel_v4f64_small_mask:
402 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
403 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
404 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
405 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
406 ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
407 ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
408 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
411 ; AVX512VL-LABEL: signbit_sel_v4f64_small_mask:
413 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
414 ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
415 ; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
416 ; AVX512VL-NEXT: retq
418 ; XOP-LABEL: signbit_sel_v4f64_small_mask:
420 ; XOP-NEXT: vpmovsxdq %xmm2, %xmm3
421 ; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
422 ; XOP-NEXT: vpmovsxdq %xmm2, %xmm2
423 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
424 ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
426 %tr = icmp slt <4 x i32> %mask, zeroinitializer
427 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
431 ; Try a 512-bit vector to make sure AVX-512 is handled as expected.
433 define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) {
434 ; AVX12-LABEL: signbit_sel_v8f64:
436 ; AVX12-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
437 ; AVX12-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
440 ; AVX512-LABEL: signbit_sel_v8f64:
442 ; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
443 ; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
444 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
447 ; XOP-LABEL: signbit_sel_v8f64:
449 ; XOP-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
450 ; XOP-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
452 %tr = icmp slt <8 x i64> %mask, zeroinitializer
453 %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
457 ; If we have a floating-point compare:
459 ; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
461 define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
462 ; AVX12-LABEL: signbit_sel_v4f32_fcmp:
464 ; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2
465 ; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
466 ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
469 ; AVX512F-LABEL: signbit_sel_v4f32_fcmp:
471 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
472 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
473 ; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2
474 ; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1
475 ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
476 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
477 ; AVX512F-NEXT: vzeroupper
480 ; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
482 ; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2
483 ; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1
484 ; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
485 ; AVX512VL-NEXT: retq
487 ; XOP-LABEL: signbit_sel_v4f32_fcmp:
489 ; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2
490 ; XOP-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
491 ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
493 %cmp = fcmp olt <4 x float> %x, zeroinitializer
494 %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
498 define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
499 ; AVX1-LABEL: blend_splat1_mask_cond_v4i64:
501 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3
502 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
503 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
504 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
505 ; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
508 ; AVX2-LABEL: blend_splat1_mask_cond_v4i64:
510 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
511 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
514 ; AVX512F-LABEL: blend_splat1_mask_cond_v4i64:
516 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
517 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
518 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
519 ; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
520 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
521 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
524 ; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64:
526 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
527 ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
528 ; AVX512VL-NEXT: retq
530 ; XOP-LABEL: blend_splat1_mask_cond_v4i64:
532 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
533 ; XOP-NEXT: vpsllq $63, %xmm3, %xmm3
534 ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
535 ; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
536 ; XOP-NEXT: vpsllq $63, %xmm0, %xmm0
537 ; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
538 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
539 ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
541 %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
542 %c = icmp eq <4 x i64> %a, zeroinitializer
543 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
547 define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
548 ; AVX12-LABEL: blend_splat1_mask_cond_v4i32:
550 ; AVX12-NEXT: vpslld $31, %xmm0, %xmm0
551 ; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
554 ; AVX512F-LABEL: blend_splat1_mask_cond_v4i32:
556 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
557 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
558 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
559 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
560 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
561 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
562 ; AVX512F-NEXT: vzeroupper
565 ; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32:
567 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
568 ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
569 ; AVX512VL-NEXT: retq
571 ; XOP-LABEL: blend_splat1_mask_cond_v4i32:
573 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
574 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
575 ; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
576 ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
578 %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
579 %c = icmp eq <4 x i32> %a, zeroinitializer
580 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
584 define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
585 ; AVX1-LABEL: blend_splat1_mask_cond_v16i16:
587 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm3
588 ; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3
589 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
590 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
591 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
592 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
593 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1
594 ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
595 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
598 ; AVX2-LABEL: blend_splat1_mask_cond_v16i16:
600 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
601 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
602 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
605 ; AVX512F-LABEL: blend_splat1_mask_cond_v16i16:
607 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
608 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
609 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
610 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
613 ; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16:
615 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
616 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
617 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
618 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
619 ; AVX512VL-NEXT: retq
621 ; XOP-LABEL: blend_splat1_mask_cond_v16i16:
623 ; XOP-NEXT: vpsllw $15, %xmm0, %xmm3
624 ; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
625 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
626 ; XOP-NEXT: vpsllw $15, %xmm0, %xmm0
627 ; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
628 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
629 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
631 %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
632 %c = icmp eq <16 x i16> %a, zeroinitializer
633 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
637 define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
638 ; AVX12-LABEL: blend_splat1_mask_cond_v16i8:
640 ; AVX12-NEXT: vpsllw $7, %xmm0, %xmm0
641 ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
644 ; AVX512F-LABEL: blend_splat1_mask_cond_v16i8:
646 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
647 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
648 ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
649 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
652 ; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8:
654 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
655 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
656 ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
657 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
658 ; AVX512VL-NEXT: retq
660 ; XOP-LABEL: blend_splat1_mask_cond_v16i8:
662 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
663 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
664 ; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
665 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
667 %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
668 %c = icmp eq <16 x i8> %a, zeroinitializer
669 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
673 define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
674 ; AVX12-LABEL: blend_splatmax_mask_cond_v2i64:
676 ; AVX12-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
679 ; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64:
681 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
682 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
683 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
684 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
685 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1
686 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
687 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
688 ; AVX512F-NEXT: vzeroupper
691 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64:
693 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
694 ; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
695 ; AVX512VL-NEXT: retq
697 ; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
699 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
700 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
701 ; XOP-NEXT: vpcomneqq %xmm3, %xmm0, %xmm0
702 ; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
704 %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
705 %c = icmp eq <2 x i64> %a, zeroinitializer
706 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
710 define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
711 ; AVX12-LABEL: blend_splatmax_mask_cond_v8i32:
713 ; AVX12-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
716 ; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32:
718 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
719 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
720 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
721 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
722 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
723 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
726 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32:
728 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
729 ; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
730 ; AVX512VL-NEXT: retq
732 ; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
734 ; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
736 %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
737 %c = icmp eq <8 x i32> %a, zeroinitializer
738 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
742 define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
743 ; AVX12-LABEL: blend_splatmax_mask_cond_v8i16:
745 ; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
746 ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
749 ; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16:
751 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
752 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
753 ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
754 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
757 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16:
759 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
760 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
761 ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
762 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
763 ; AVX512VL-NEXT: retq
765 ; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
767 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
768 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
769 ; XOP-NEXT: vpcomneqw %xmm3, %xmm0, %xmm0
770 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
772 %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
773 %c = icmp eq <8 x i16> %a, zeroinitializer
774 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
778 define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
779 ; AVX1-LABEL: blend_splatmax_mask_cond_v32i8:
781 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
782 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
783 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
784 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0
785 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
786 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1
787 ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
788 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
791 ; AVX2-LABEL: blend_splatmax_mask_cond_v32i8:
793 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
796 ; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8:
798 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
799 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
800 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
801 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
804 ; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8:
806 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
807 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
808 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
809 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
810 ; AVX512VL-NEXT: retq
812 ; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
814 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
815 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
816 ; XOP-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
817 ; XOP-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0
818 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
819 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
821 %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
822 %c = icmp eq <32 x i8> %a, zeroinitializer
823 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
827 define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
828 ; AVX1-LABEL: blend_splat_mask_cond_v4i64:
830 ; AVX1-NEXT: vpsllq $62, %xmm0, %xmm3
831 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
832 ; AVX1-NEXT: vpsllq $62, %xmm0, %xmm0
833 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
834 ; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
837 ; AVX2-LABEL: blend_splat_mask_cond_v4i64:
839 ; AVX2-NEXT: vpsllq $62, %ymm0, %ymm0
840 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
843 ; AVX512F-LABEL: blend_splat_mask_cond_v4i64:
845 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
846 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
847 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
848 ; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
849 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
850 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
853 ; AVX512VL-LABEL: blend_splat_mask_cond_v4i64:
855 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
856 ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
857 ; AVX512VL-NEXT: retq
859 ; XOP-LABEL: blend_splat_mask_cond_v4i64:
861 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
862 ; XOP-NEXT: vpsllq $62, %xmm3, %xmm3
863 ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
864 ; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
865 ; XOP-NEXT: vpsllq $62, %xmm0, %xmm0
866 ; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
867 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
868 ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
870 %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
871 %c = icmp eq <4 x i64> %a, zeroinitializer
872 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
876 define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
877 ; AVX12-LABEL: blend_splat_mask_cond_v4i32:
879 ; AVX12-NEXT: vpslld $15, %xmm0, %xmm0
880 ; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
883 ; AVX512F-LABEL: blend_splat_mask_cond_v4i32:
885 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
886 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
887 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
888 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
889 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
890 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
891 ; AVX512F-NEXT: vzeroupper
894 ; AVX512VL-LABEL: blend_splat_mask_cond_v4i32:
896 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
897 ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
898 ; AVX512VL-NEXT: retq
900 ; XOP-LABEL: blend_splat_mask_cond_v4i32:
902 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
903 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
904 ; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
905 ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
907 %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
908 %c = icmp eq <4 x i32> %a, zeroinitializer
909 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
913 define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
914 ; AVX1-LABEL: blend_splat_mask_cond_v16i16:
916 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm3
917 ; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3
918 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
919 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
920 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
921 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
922 ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1
923 ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
924 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
927 ; AVX2-LABEL: blend_splat_mask_cond_v16i16:
929 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
930 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
931 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
934 ; AVX512F-LABEL: blend_splat_mask_cond_v16i16:
936 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
937 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
938 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
939 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
942 ; AVX512VL-LABEL: blend_splat_mask_cond_v16i16:
944 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
945 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
946 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
947 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
948 ; AVX512VL-NEXT: retq
950 ; XOP-LABEL: blend_splat_mask_cond_v16i16:
952 ; XOP-NEXT: vpsllw $5, %xmm0, %xmm3
953 ; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
954 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
955 ; XOP-NEXT: vpsllw $5, %xmm0, %xmm0
956 ; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
957 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
958 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
960 %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
961 %c = icmp eq <16 x i16> %a, zeroinitializer
962 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
966 define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
967 ; AVX12-LABEL: blend_splat_mask_cond_v16i8:
969 ; AVX12-NEXT: vpsllw $5, %xmm0, %xmm0
970 ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
973 ; AVX512F-LABEL: blend_splat_mask_cond_v16i8:
975 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
976 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
977 ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
978 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
981 ; AVX512VL-LABEL: blend_splat_mask_cond_v16i8:
983 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
984 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
985 ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
986 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
987 ; AVX512VL-NEXT: retq
989 ; XOP-LABEL: blend_splat_mask_cond_v16i8:
991 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
992 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
993 ; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
994 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
996 %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
997 %c = icmp eq <16 x i8> %a, zeroinitializer
998 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
1002 define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
1003 ; AVX1-LABEL: blend_mask_cond_v2i64:
1005 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1006 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1007 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1008 ; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
1011 ; AVX2-LABEL: blend_mask_cond_v2i64:
1013 ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1014 ; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
1017 ; AVX512F-LABEL: blend_mask_cond_v2i64:
1019 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
1020 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1021 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1022 ; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [1,4]
1023 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1
1024 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
1025 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1026 ; AVX512F-NEXT: vzeroupper
1027 ; AVX512F-NEXT: retq
1029 ; AVX512VL-LABEL: blend_mask_cond_v2i64:
1030 ; AVX512VL: # %bb.0:
1031 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
1032 ; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
1033 ; AVX512VL-NEXT: retq
1035 ; XOP-LABEL: blend_mask_cond_v2i64:
1037 ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1038 ; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
1040 %a = and <2 x i64> %x, <i64 1, i64 4>
1041 %c = icmp eq <2 x i64> %a, zeroinitializer
1042 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
1046 define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
1047 ; AVX1-LABEL: blend_mask_cond_v4i32:
1049 ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1050 ; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1053 ; AVX2-LABEL: blend_mask_cond_v4i32:
1055 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1056 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1059 ; AVX512F-LABEL: blend_mask_cond_v4i32:
1061 ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
1062 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1063 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1064 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [65536,512,2,1]
1065 ; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1
1066 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
1067 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1068 ; AVX512F-NEXT: vzeroupper
1069 ; AVX512F-NEXT: retq
1071 ; AVX512VL-LABEL: blend_mask_cond_v4i32:
1072 ; AVX512VL: # %bb.0:
1073 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
1074 ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
1075 ; AVX512VL-NEXT: retq
1077 ; XOP-LABEL: blend_mask_cond_v4i32:
1079 ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1080 ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1082 %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
1083 %c = icmp eq <4 x i32> %a, zeroinitializer
1084 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
1088 define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
1089 ; AVX12-LABEL: blend_mask_cond_v8i16:
1091 ; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1092 ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
1093 ; AVX12-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1094 ; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1097 ; AVX512F-LABEL: blend_mask_cond_v8i16:
1099 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1100 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1101 ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1102 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1103 ; AVX512F-NEXT: retq
1105 ; AVX512VL-LABEL: blend_mask_cond_v8i16:
1106 ; AVX512VL: # %bb.0:
1107 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1108 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1109 ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1110 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
1111 ; AVX512VL-NEXT: retq
1113 ; XOP-LABEL: blend_mask_cond_v8i16:
1115 ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
1116 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1117 ; XOP-NEXT: vpcomltw %xmm3, %xmm0, %xmm0
1118 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1120 %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096>
1121 %c = icmp eq <8 x i16> %a, zeroinitializer
1122 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
1126 define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
1127 ; AVX12-LABEL: blend_mask_cond_v16i8:
1129 ; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1130 ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
1131 ; AVX12-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1132 ; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1135 ; AVX512F-LABEL: blend_mask_cond_v16i8:
1137 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1138 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1139 ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1140 ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1141 ; AVX512F-NEXT: retq
1143 ; AVX512VL-LABEL: blend_mask_cond_v16i8:
1144 ; AVX512VL: # %bb.0:
1145 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1146 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1147 ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1148 ; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
1149 ; AVX512VL-NEXT: retq
1151 ; XOP-LABEL: blend_mask_cond_v16i8:
1153 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1154 ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1156 %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
1157 %c = icmp eq <16 x i8> %a, zeroinitializer
1158 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
1162 define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
1163 ; AVX1-LABEL: blend_mask_cond_v4i64:
1165 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1166 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1167 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1168 ; AVX1-NEXT: vpcmpeqq %xmm4, %xmm3, %xmm3
1169 ; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0
1170 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1171 ; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
1174 ; AVX2-LABEL: blend_mask_cond_v4i64:
1176 ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1177 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1180 ; AVX512F-LABEL: blend_mask_cond_v4i64:
1182 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
1183 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1184 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1185 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm3 = [2,4,32768,1]
1186 ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1
1187 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
1188 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1189 ; AVX512F-NEXT: retq
1191 ; AVX512VL-LABEL: blend_mask_cond_v4i64:
1192 ; AVX512VL: # %bb.0:
1193 ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1194 ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
1195 ; AVX512VL-NEXT: retq
1197 ; XOP-LABEL: blend_mask_cond_v4i64:
1199 ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1200 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
1201 ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1202 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1203 ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1205 %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1>
1206 %c = icmp eq <4 x i64> %a, zeroinitializer
1207 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
1211 define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
1212 ; AVX1-LABEL: blend_mask_cond_v8i32:
1214 ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1216 ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1217 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1218 ; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1221 ; AVX2-LABEL: blend_mask_cond_v8i32:
1223 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1224 ; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1227 ; AVX512F-LABEL: blend_mask_cond_v8i32:
1229 ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
1230 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1231 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1232 ; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096]
1233 ; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1
1234 ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
1235 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1236 ; AVX512F-NEXT: retq
1238 ; AVX512VL-LABEL: blend_mask_cond_v8i32:
1239 ; AVX512VL: # %bb.0:
1240 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1241 ; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
1242 ; AVX512VL-NEXT: retq
1244 ; XOP-LABEL: blend_mask_cond_v8i32:
1246 ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1247 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
1248 ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1249 ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1250 ; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1252 %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096>
1253 %c = icmp eq <8 x i32> %a, zeroinitializer
1254 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
1258 define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
1259 ; AVX1-LABEL: blend_mask_cond_v16i16:
1261 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1262 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1263 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1264 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
1265 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
1266 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1267 ; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2
1268 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
1269 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
1272 ; AVX2-LABEL: blend_mask_cond_v16i16:
1274 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1275 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1276 ; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
1277 ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1280 ; AVX512F-LABEL: blend_mask_cond_v16i16:
1282 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1283 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1284 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
1285 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1286 ; AVX512F-NEXT: retq
1288 ; AVX512VL-LABEL: blend_mask_cond_v16i16:
1289 ; AVX512VL: # %bb.0:
1290 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1291 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1292 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
1293 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
1294 ; AVX512VL-NEXT: retq
1296 ; XOP-LABEL: blend_mask_cond_v16i16:
1298 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
1299 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1300 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
1301 ; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3
1302 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1303 ; XOP-NEXT: vpcomltw %xmm4, %xmm0, %xmm0
1304 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1305 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1307 %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
1308 %c = icmp eq <16 x i16> %a, zeroinitializer
1309 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
1313 define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
1314 ; AVX1-LABEL: blend_mask_cond_v32i8:
1316 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1317 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1318 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1319 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
1320 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
1321 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1322 ; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2
1323 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
1324 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
1327 ; AVX2-LABEL: blend_mask_cond_v32i8:
1329 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1330 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1331 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1332 ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1335 ; AVX512F-LABEL: blend_mask_cond_v32i8:
1337 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1338 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1339 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1340 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1341 ; AVX512F-NEXT: retq
1343 ; AVX512VL-LABEL: blend_mask_cond_v32i8:
1344 ; AVX512VL: # %bb.0:
1345 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1346 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1347 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1348 ; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
1349 ; AVX512VL-NEXT: retq
1351 ; XOP-LABEL: blend_mask_cond_v32i8:
1353 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
1354 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1355 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
1356 ; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3
1357 ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1358 ; XOP-NEXT: vpcomltb %xmm4, %xmm0, %xmm0
1359 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1360 ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1362 %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16>
1363 %c = icmp eq <32 x i8> %a, zeroinitializer
1364 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
1368 define void @store_blend_load_v4i64(ptr %a0, ptr %a1, ptr %a2) {
1369 ; AVX1-LABEL: store_blend_load_v4i64:
1371 ; AVX1-NEXT: vmovapd (%rsi), %ymm0
1372 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
1373 ; AVX1-NEXT: # xmm1 = mem[0,0]
1374 ; AVX1-NEXT: vpxor 16(%rdi), %xmm1, %xmm2
1375 ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775815,9223372036854775815]
1376 ; AVX1-NEXT: # xmm3 = mem[0,0]
1377 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1378 ; AVX1-NEXT: vpxor (%rdi), %xmm1, %xmm1
1379 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
1380 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1381 ; AVX1-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
1382 ; AVX1-NEXT: vmovapd %ymm0, (%rdx)
1383 ; AVX1-NEXT: vzeroupper
1386 ; AVX2-LABEL: store_blend_load_v4i64:
1388 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1389 ; AVX2-NEXT: vmovapd (%rsi), %ymm1
1390 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1391 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
1392 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775815,9223372036854775815,9223372036854775815,9223372036854775815]
1393 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1394 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1395 ; AVX2-NEXT: vmovapd %ymm0, (%rdx)
1396 ; AVX2-NEXT: vzeroupper
1399 ; AVX512F-LABEL: store_blend_load_v4i64:
1401 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1402 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1403 ; AVX512F-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
1404 ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
1405 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
1406 ; AVX512F-NEXT: vzeroupper
1407 ; AVX512F-NEXT: retq
1409 ; AVX512VL-LABEL: store_blend_load_v4i64:
1410 ; AVX512VL: # %bb.0:
1411 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1412 ; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1
1413 ; AVX512VL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
1414 ; AVX512VL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
1415 ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1416 ; AVX512VL-NEXT: vzeroupper
1417 ; AVX512VL-NEXT: retq
1419 ; XOP-LABEL: store_blend_load_v4i64:
1421 ; XOP-NEXT: vmovapd (%rsi), %ymm0
1422 ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
1423 ; XOP-NEXT: vpcomltuq 16(%rdi), %xmm1, %xmm2
1424 ; XOP-NEXT: vpcomltuq (%rdi), %xmm1, %xmm1
1425 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1426 ; XOP-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
1427 ; XOP-NEXT: vmovapd %ymm0, (%rdx)
1428 ; XOP-NEXT: vzeroupper
1430 %v0 = load <4 x i64>, ptr %a0
1431 %v1 = load <4 x i64>, ptr %a1
1432 %cmp = icmp ugt <4 x i64> %v0, <i64 7, i64 7, i64 7, i64 7>
1433 %res = select <4 x i1> %cmp, <4 x i64> %v0, <4 x i64> %v1
1434 store <4 x i64> %res, ptr %a2
1438 define void @store_blend_load_v8i32(ptr %a0, ptr %a1, ptr %a2) {
1439 ; AVX1-LABEL: store_blend_load_v8i32:
1441 ; AVX1-NEXT: vmovaps (%rsi), %ymm0
1442 ; AVX1-NEXT: vmovdqa (%rdi), %xmm1
1443 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2
1444 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [8,8,8,8]
1445 ; AVX1-NEXT: vpmaxud %xmm3, %xmm2, %xmm4
1446 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm2
1447 ; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm3
1448 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
1449 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1450 ; AVX1-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0
1451 ; AVX1-NEXT: vmovaps %ymm0, (%rdx)
1452 ; AVX1-NEXT: vzeroupper
1455 ; AVX2-LABEL: store_blend_load_v8i32:
1457 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1458 ; AVX2-NEXT: vmovaps (%rsi), %ymm1
1459 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8]
1460 ; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm2
1461 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
1462 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1463 ; AVX2-NEXT: vmovaps %ymm0, (%rdx)
1464 ; AVX2-NEXT: vzeroupper
1467 ; AVX512F-LABEL: store_blend_load_v8i32:
1469 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1470 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1471 ; AVX512F-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
1472 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
1473 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
1474 ; AVX512F-NEXT: vzeroupper
1475 ; AVX512F-NEXT: retq
1477 ; AVX512VL-LABEL: store_blend_load_v8i32:
1478 ; AVX512VL: # %bb.0:
1479 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1480 ; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1
1481 ; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
1482 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
1483 ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1484 ; AVX512VL-NEXT: vzeroupper
1485 ; AVX512VL-NEXT: retq
1487 ; XOP-LABEL: store_blend_load_v8i32:
1489 ; XOP-NEXT: vmovaps (%rsi), %ymm0
1490 ; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
1491 ; XOP-NEXT: vpcomltud 16(%rdi), %xmm1, %xmm2
1492 ; XOP-NEXT: vpcomltud (%rdi), %xmm1, %xmm1
1493 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1494 ; XOP-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0
1495 ; XOP-NEXT: vmovaps %ymm0, (%rdx)
1496 ; XOP-NEXT: vzeroupper
1498 %v0 = load <8 x i32>, ptr %a0
1499 %v1 = load <8 x i32>, ptr %a1
1500 %cmp = icmp ugt <8 x i32> %v0, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
1501 %res = select <8 x i1> %cmp, <8 x i32> %v0, <8 x i32> %v1
1502 store <8 x i32> %res, ptr %a2
1506 define void @store_blend_load_v16i16(ptr %a0, ptr %a1, ptr %a2) {
1507 ; AVX1-LABEL: store_blend_load_v16i16:
1509 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1510 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
1511 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8]
1512 ; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm3
1513 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm3
1514 ; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm2
1515 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
1516 ; AVX1-NEXT: vmovdqa (%rsi), %xmm4
1517 ; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5
1518 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
1519 ; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
1520 ; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
1521 ; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx)
1524 ; AVX2-LABEL: store_blend_load_v16i16:
1526 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1527 ; AVX2-NEXT: vmovdqa (%rsi), %ymm1
1528 ; AVX2-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1529 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
1530 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1531 ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
1532 ; AVX2-NEXT: vzeroupper
1535 ; AVX512F-LABEL: store_blend_load_v16i16:
1537 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1538 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1539 ; AVX512F-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1540 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
1541 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1542 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
1543 ; AVX512F-NEXT: vzeroupper
1544 ; AVX512F-NEXT: retq
1546 ; AVX512VL-LABEL: store_blend_load_v16i16:
1547 ; AVX512VL: # %bb.0:
1548 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1549 ; AVX512VL-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1550 ; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
1551 ; AVX512VL-NEXT: vpternlogq $202, (%rsi), %ymm0, %ymm1
1552 ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1553 ; AVX512VL-NEXT: vzeroupper
1554 ; AVX512VL-NEXT: retq
1556 ; XOP-LABEL: store_blend_load_v16i16:
1558 ; XOP-NEXT: vmovdqa (%rdi), %ymm0
1559 ; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
1560 ; XOP-NEXT: vpcomltuw 16(%rdi), %xmm1, %xmm2
1561 ; XOP-NEXT: vpcomltuw (%rdi), %xmm1, %xmm1
1562 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1563 ; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0
1564 ; XOP-NEXT: vmovdqa %ymm0, (%rdx)
1565 ; XOP-NEXT: vzeroupper
1567 %v0 = load <16 x i16>, ptr %a0
1568 %v1 = load <16 x i16>, ptr %a1
1569 %cmp = icmp ugt <16 x i16> %v0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1570 %res = select <16 x i1> %cmp, <16 x i16> %v0, <16 x i16> %v1
1571 store <16 x i16> %res, ptr %a2
1575 define void @store_blend_load_v32i8(ptr %a0, ptr %a1, ptr %a2) {
1576 ; AVX1-LABEL: store_blend_load_v32i8:
1578 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1579 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
1580 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1581 ; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm3
1582 ; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
1583 ; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm2
1584 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1585 ; AVX1-NEXT: vmovdqa (%rsi), %xmm4
1586 ; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5
1587 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
1588 ; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
1589 ; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
1590 ; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx)
1593 ; AVX2-LABEL: store_blend_load_v32i8:
1595 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1596 ; AVX2-NEXT: vmovdqa (%rsi), %ymm1
1597 ; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1598 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
1599 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1600 ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
1601 ; AVX2-NEXT: vzeroupper
1604 ; AVX512F-LABEL: store_blend_load_v32i8:
1606 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1607 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1608 ; AVX512F-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1609 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
1610 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1611 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
1612 ; AVX512F-NEXT: vzeroupper
1613 ; AVX512F-NEXT: retq
1615 ; AVX512VL-LABEL: store_blend_load_v32i8:
1616 ; AVX512VL: # %bb.0:
1617 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1618 ; AVX512VL-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1619 ; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1
1620 ; AVX512VL-NEXT: vpternlogq $202, (%rsi), %ymm0, %ymm1
1621 ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1622 ; AVX512VL-NEXT: vzeroupper
1623 ; AVX512VL-NEXT: retq
1625 ; XOP-LABEL: store_blend_load_v32i8:
1627 ; XOP-NEXT: vmovdqa (%rdi), %ymm0
1628 ; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1629 ; XOP-NEXT: vpcomltub 16(%rdi), %xmm1, %xmm2
1630 ; XOP-NEXT: vpcomltub (%rdi), %xmm1, %xmm1
1631 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1632 ; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0
1633 ; XOP-NEXT: vmovdqa %ymm0, (%rdx)
1634 ; XOP-NEXT: vzeroupper
1636 %v0 = load <32 x i8>, ptr %a0
1637 %v1 = load <32 x i8>, ptr %a1
1638 %cmp = icmp ugt <32 x i8> %v0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
1639 %res = select <32 x i1> %cmp, <32 x i8> %v0, <32 x i8> %v1
1640 store <32 x i8> %res, ptr %a2
1644 define void @PR46531(ptr %x, ptr %y, ptr %z) {
1645 ; AVX12-LABEL: PR46531:
1647 ; AVX12-NEXT: vmovdqu (%rsi), %xmm0
1648 ; AVX12-NEXT: vmovdqu (%rdx), %xmm1
1649 ; AVX12-NEXT: vpor %xmm0, %xmm1, %xmm2
1650 ; AVX12-NEXT: vpxor %xmm0, %xmm1, %xmm0
1651 ; AVX12-NEXT: vpslld $31, %xmm1, %xmm1
1652 ; AVX12-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
1653 ; AVX12-NEXT: vmovups %xmm0, (%rdi)
1656 ; AVX512F-LABEL: PR46531:
1658 ; AVX512F-NEXT: vmovdqu (%rsi), %xmm0
1659 ; AVX512F-NEXT: vmovdqu (%rdx), %xmm1
1660 ; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm2
1661 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
1662 ; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
1663 ; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
1664 ; AVX512F-NEXT: vmovdqu %xmm0, (%rdi)
1665 ; AVX512F-NEXT: vzeroupper
1666 ; AVX512F-NEXT: retq
1668 ; AVX512VL-LABEL: PR46531:
1669 ; AVX512VL: # %bb.0:
1670 ; AVX512VL-NEXT: vmovdqu (%rsi), %xmm0
1671 ; AVX512VL-NEXT: vmovdqu (%rdx), %xmm1
1672 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
1673 ; AVX512VL-NEXT: vpxor %xmm0, %xmm1, %xmm2
1674 ; AVX512VL-NEXT: vpord %xmm0, %xmm1, %xmm2 {%k1}
1675 ; AVX512VL-NEXT: vmovdqu %xmm2, (%rdi)
1676 ; AVX512VL-NEXT: retq
1678 ; XOP-LABEL: PR46531:
1680 ; XOP-NEXT: vmovdqu (%rsi), %xmm0
1681 ; XOP-NEXT: vmovdqu (%rdx), %xmm1
1682 ; XOP-NEXT: vpor %xmm0, %xmm1, %xmm2
1683 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
1684 ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
1685 ; XOP-NEXT: vpcomneqd %xmm4, %xmm3, %xmm3
1686 ; XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0
1687 ; XOP-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0
1688 ; XOP-NEXT: vmovups %xmm0, (%rdi)
1690 %a = load <4 x i32>, ptr %y, align 4
1691 %b = load <4 x i32>, ptr %z, align 4
1692 %or = or <4 x i32> %b, %a
1693 %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1>
1694 %cmp = icmp eq <4 x i32> %and, zeroinitializer
1695 %xor = xor <4 x i32> %b, %a
1696 %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor
1697 store <4 x i32> %sel, ptr %x, align 4
1701 attributes #0 = { "no-nans-fp-math"="true" }
1702 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: