1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefix=SSE2-SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefix=SSE2-SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
10 ; SSE2-SSSE3-LABEL: v4i64:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648]
13 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm3
14 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm1
15 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm10
16 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm10
17 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm2
18 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm0
19 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm8
20 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm8
21 ; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm11
22 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm11 = xmm11[0,2],xmm10[0,2]
23 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
24 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
25 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
26 ; SSE2-SSSE3-NEXT: andps %xmm11, %xmm0
27 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,3],xmm10[1,3]
28 ; SSE2-SSSE3-NEXT: orps %xmm0, %xmm8
29 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm7
30 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm5
31 ; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm0
32 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0
33 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm6
34 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm4
35 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
36 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm1
37 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
38 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
39 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5
40 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4
41 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
42 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm4
43 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3]
44 ; SSE2-SSSE3-NEXT: orps %xmm4, %xmm1
45 ; SSE2-SSSE3-NEXT: andps %xmm8, %xmm1
46 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax
47 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
48 ; SSE2-SSSE3-NEXT: retq
52 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
53 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
54 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
55 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
56 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
57 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
58 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
59 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
60 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
61 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
62 ; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm0
63 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
64 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
65 ; AVX1-NEXT: vzeroupper
70 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
71 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
72 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
73 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
74 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
75 ; AVX2-NEXT: vzeroupper
78 ; AVX512F-LABEL: v4i64:
80 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
81 ; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
82 ; AVX512F-NEXT: kmovw %k0, %eax
83 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
84 ; AVX512F-NEXT: vzeroupper
87 ; AVX512BW-LABEL: v4i64:
89 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
90 ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
91 ; AVX512BW-NEXT: kmovd %k0, %eax
92 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
93 ; AVX512BW-NEXT: vzeroupper
95 %x0 = icmp sgt <4 x i64> %a, %b
96 %x1 = icmp sgt <4 x i64> %c, %d
97 %y = and <4 x i1> %x0, %x1
98 %res = bitcast <4 x i1> %y to i4
102 define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
103 ; SSE2-SSSE3-LABEL: v4f64:
104 ; SSE2-SSSE3: # %bb.0:
105 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
106 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
107 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
108 ; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7
109 ; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6
110 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
111 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6
112 ; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax
113 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
114 ; SSE2-SSSE3-NEXT: retq
116 ; AVX12-LABEL: v4f64:
118 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm2
119 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
120 ; AVX12-NEXT: vandpd %ymm2, %ymm0, %ymm0
121 ; AVX12-NEXT: vmovmskpd %ymm0, %eax
122 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
123 ; AVX12-NEXT: vzeroupper
126 ; AVX512F-LABEL: v4f64:
128 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1
129 ; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
130 ; AVX512F-NEXT: kmovw %k0, %eax
131 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
132 ; AVX512F-NEXT: vzeroupper
135 ; AVX512BW-LABEL: v4f64:
137 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1
138 ; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
139 ; AVX512BW-NEXT: kmovd %k0, %eax
140 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
141 ; AVX512BW-NEXT: vzeroupper
142 ; AVX512BW-NEXT: retq
143 %x0 = fcmp ogt <4 x double> %a, %b
144 %x1 = fcmp ogt <4 x double> %c, %d
145 %y = and <4 x i1> %x0, %x1
146 %res = bitcast <4 x i1> %y to i4
150 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
151 ; SSE2-SSSE3-LABEL: v16i16:
152 ; SSE2-SSSE3: # %bb.0:
153 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0
154 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1
155 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4
156 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
157 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5
158 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
159 ; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4
160 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
161 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
162 ; SSE2-SSSE3-NEXT: retq
164 ; AVX1-LABEL: v16i16:
166 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm4
167 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
168 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
169 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
170 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
171 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
172 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
173 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
174 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2
175 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
176 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
177 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
178 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
179 ; AVX1-NEXT: vzeroupper
182 ; AVX2-LABEL: v16i16:
184 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
185 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
186 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
187 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
188 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
189 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
190 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
191 ; AVX2-NEXT: vzeroupper
194 ; AVX512F-LABEL: v16i16:
196 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
197 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
198 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
199 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
200 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
201 ; AVX512F-NEXT: kmovw %k0, %eax
202 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
203 ; AVX512F-NEXT: vzeroupper
206 ; AVX512BW-LABEL: v16i16:
208 ; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
209 ; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
210 ; AVX512BW-NEXT: kmovd %k0, %eax
211 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
212 ; AVX512BW-NEXT: vzeroupper
213 ; AVX512BW-NEXT: retq
214 %x0 = icmp sgt <16 x i16> %a, %b
215 %x1 = icmp sgt <16 x i16> %c, %d
216 %y = and <16 x i1> %x0, %x1
217 %res = bitcast <16 x i1> %y to i16
221 define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
222 ; SSE2-SSSE3-LABEL: v8i32_and:
223 ; SSE2-SSSE3: # %bb.0:
224 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
225 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
226 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
227 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
228 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
229 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
230 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
231 ; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4
232 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
233 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
234 ; SSE2-SSSE3-NEXT: retq
236 ; AVX1-LABEL: v8i32_and:
238 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
239 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
240 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
241 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
242 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
243 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
244 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
245 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
246 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
247 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
248 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
249 ; AVX1-NEXT: vmovmskps %ymm0, %eax
250 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
251 ; AVX1-NEXT: vzeroupper
254 ; AVX2-LABEL: v8i32_and:
256 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
257 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
258 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
259 ; AVX2-NEXT: vmovmskps %ymm0, %eax
260 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
261 ; AVX2-NEXT: vzeroupper
264 ; AVX512F-LABEL: v8i32_and:
266 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
267 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
268 ; AVX512F-NEXT: kmovw %k0, %eax
269 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
270 ; AVX512F-NEXT: vzeroupper
273 ; AVX512BW-LABEL: v8i32_and:
275 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
276 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
277 ; AVX512BW-NEXT: kmovd %k0, %eax
278 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
279 ; AVX512BW-NEXT: vzeroupper
280 ; AVX512BW-NEXT: retq
281 %x0 = icmp sgt <8 x i32> %a, %b
282 %x1 = icmp sgt <8 x i32> %c, %d
283 %y = and <8 x i1> %x0, %x1
284 %res = bitcast <8 x i1> %y to i8
288 ; We should see through any bitwise logic op.
290 define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
291 ; SSE2-SSSE3-LABEL: v8i32_or:
292 ; SSE2-SSSE3: # %bb.0:
293 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
294 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
295 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
296 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm4
297 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
298 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm5
299 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
300 ; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4
301 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
302 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
303 ; SSE2-SSSE3-NEXT: retq
305 ; AVX1-LABEL: v8i32_or:
307 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
308 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
309 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
310 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
311 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
312 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
313 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
314 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
315 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
316 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
317 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
318 ; AVX1-NEXT: vmovmskps %ymm0, %eax
319 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
320 ; AVX1-NEXT: vzeroupper
323 ; AVX2-LABEL: v8i32_or:
325 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
326 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
327 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
328 ; AVX2-NEXT: vmovmskps %ymm0, %eax
329 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
330 ; AVX2-NEXT: vzeroupper
333 ; AVX512F-LABEL: v8i32_or:
335 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
336 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
337 ; AVX512F-NEXT: korw %k1, %k0, %k0
338 ; AVX512F-NEXT: kmovw %k0, %eax
339 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
340 ; AVX512F-NEXT: vzeroupper
343 ; AVX512BW-LABEL: v8i32_or:
345 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
346 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
347 ; AVX512BW-NEXT: korw %k1, %k0, %k0
348 ; AVX512BW-NEXT: kmovd %k0, %eax
349 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
350 ; AVX512BW-NEXT: vzeroupper
351 ; AVX512BW-NEXT: retq
352 %x0 = icmp sgt <8 x i32> %a, %b
353 %x1 = icmp sgt <8 x i32> %c, %d
354 %y = or <8 x i1> %x0, %x1
355 %res = bitcast <8 x i1> %y to i8
359 ; We should see through multiple bitwise logic ops.
361 define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) {
362 ; SSE2-SSSE3-LABEL: v8i32_or_and:
363 ; SSE2-SSSE3: # %bb.0:
364 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
365 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
366 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
367 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
368 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
369 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm7
370 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
371 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm6
372 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm9
373 ; SSE2-SSSE3-NEXT: pand %xmm6, %xmm9
374 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8
375 ; SSE2-SSSE3-NEXT: pand %xmm7, %xmm8
376 ; SSE2-SSSE3-NEXT: packssdw %xmm8, %xmm9
377 ; SSE2-SSSE3-NEXT: packsswb %xmm9, %xmm9
378 ; SSE2-SSSE3-NEXT: pmovmskb %xmm9, %eax
379 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
380 ; SSE2-SSSE3-NEXT: retq
382 ; AVX1-LABEL: v8i32_or_and:
384 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
385 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7
386 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm7, %xmm6
387 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
388 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
389 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
390 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
391 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3
392 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
393 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
394 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
395 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
396 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2
397 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
398 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2
399 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
400 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
401 ; AVX1-NEXT: vmovmskps %ymm0, %eax
402 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
403 ; AVX1-NEXT: vzeroupper
406 ; AVX2-LABEL: v8i32_or_and:
408 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2
409 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
410 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
411 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1
412 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
413 ; AVX2-NEXT: vmovmskps %ymm0, %eax
414 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
415 ; AVX2-NEXT: vzeroupper
418 ; AVX512F-LABEL: v8i32_or_and:
420 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
421 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
422 ; AVX512F-NEXT: korw %k1, %k0, %k1
423 ; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
424 ; AVX512F-NEXT: kmovw %k0, %eax
425 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
426 ; AVX512F-NEXT: vzeroupper
429 ; AVX512BW-LABEL: v8i32_or_and:
431 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
432 ; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
433 ; AVX512BW-NEXT: korw %k1, %k0, %k1
434 ; AVX512BW-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
435 ; AVX512BW-NEXT: kmovd %k0, %eax
436 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
437 ; AVX512BW-NEXT: vzeroupper
438 ; AVX512BW-NEXT: retq
439 %x0 = icmp sgt <8 x i32> %a, %b
440 %x1 = icmp slt <8 x i32> %c, %d
441 %x2 = icmp eq <8 x i32> %e, %f
442 %y = or <8 x i1> %x0, %x1
443 %z = and <8 x i1> %y, %x2
444 %res = bitcast <8 x i1> %z to i8
448 ; PR61104 - peek through vselect allones operand
449 ; combineBitcastvxi1 may be called before the fold vselect(c,-1,x) --> or(c,x) vXi1 fold
451 define i8 @v8i32_or_vselect(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
452 ; SSE2-SSSE3-LABEL: v8i32_or_vselect:
453 ; SSE2-SSSE3: # %bb.0:
454 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
455 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
456 ; SSE2-SSSE3-NEXT: por %xmm5, %xmm1
457 ; SSE2-SSSE3-NEXT: por %xmm4, %xmm0
458 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
459 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
460 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
461 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
462 ; SSE2-SSSE3-NEXT: retq
464 ; AVX1-LABEL: v8i32_or_vselect:
466 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
467 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
468 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3
469 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
470 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
471 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
472 ; AVX1-NEXT: vmovmskps %ymm0, %eax
473 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
474 ; AVX1-NEXT: vzeroupper
477 ; AVX2-LABEL: v8i32_or_vselect:
479 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
480 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
481 ; AVX2-NEXT: vmovmskps %ymm0, %eax
482 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
483 ; AVX2-NEXT: vzeroupper
486 ; AVX512F-LABEL: v8i32_or_vselect:
488 ; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
489 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
490 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm0, %k1
491 ; AVX512F-NEXT: korw %k1, %k0, %k0
492 ; AVX512F-NEXT: kmovw %k0, %eax
493 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
494 ; AVX512F-NEXT: vzeroupper
497 ; AVX512BW-LABEL: v8i32_or_vselect:
499 ; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
500 ; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
501 ; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm0, %k1
502 ; AVX512BW-NEXT: korw %k1, %k0, %k0
503 ; AVX512BW-NEXT: kmovd %k0, %eax
504 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
505 ; AVX512BW-NEXT: vzeroupper
506 ; AVX512BW-NEXT: retq
507 %cmp = icmp eq <8 x i32> %a0, %a1
508 %slt = icmp slt <8 x i32> %a2, zeroinitializer
509 %sel = select <8 x i1> %cmp, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %slt
510 %res = bitcast <8 x i1> %sel to i8
514 define i8 @v8i32_or_select(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3, i1 %a4) {
515 ; SSE2-SSSE3-LABEL: v8i32_or_select:
516 ; SSE2-SSSE3: # %bb.0:
517 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2
518 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
519 ; SSE2-SSSE3-NEXT: testb $1, %dil
520 ; SSE2-SSSE3-NEXT: jne .LBB7_1
521 ; SSE2-SSSE3-NEXT: # %bb.2:
522 ; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm0
523 ; SSE2-SSSE3-NEXT: jmp .LBB7_3
524 ; SSE2-SSSE3-NEXT: .LBB7_1:
525 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm5, %xmm1
526 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
527 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
528 ; SSE2-SSSE3-NEXT: .LBB7_3:
529 ; SSE2-SSSE3-NEXT: por %xmm3, %xmm7
530 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm6
531 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
532 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm6
533 ; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6
534 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
535 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
536 ; SSE2-SSSE3-NEXT: retq
538 ; AVX1-LABEL: v8i32_or_select:
540 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
541 ; AVX1-NEXT: testb $1, %dil
542 ; AVX1-NEXT: jne .LBB7_1
543 ; AVX1-NEXT: # %bb.2:
544 ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
545 ; AVX1-NEXT: jmp .LBB7_3
546 ; AVX1-NEXT: .LBB7_1:
547 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
548 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5
549 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
550 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
551 ; AVX1-NEXT: .LBB7_3:
552 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
553 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4
554 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
555 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
556 ; AVX1-NEXT: vorps %ymm2, %ymm3, %ymm1
557 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
558 ; AVX1-NEXT: vmovmskps %ymm0, %eax
559 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
560 ; AVX1-NEXT: vzeroupper
563 ; AVX2-LABEL: v8i32_or_select:
565 ; AVX2-NEXT: testb $1, %dil
566 ; AVX2-NEXT: jne .LBB7_1
567 ; AVX2-NEXT: # %bb.2:
568 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
569 ; AVX2-NEXT: jmp .LBB7_3
570 ; AVX2-NEXT: .LBB7_1:
571 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
572 ; AVX2-NEXT: .LBB7_3:
573 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
574 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm1
575 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
576 ; AVX2-NEXT: vmovmskps %ymm0, %eax
577 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
578 ; AVX2-NEXT: vzeroupper
581 ; AVX512F-LABEL: v8i32_or_select:
583 ; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
584 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
585 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm1, %k1
586 ; AVX512F-NEXT: testb $1, %dil
587 ; AVX512F-NEXT: jne .LBB7_1
588 ; AVX512F-NEXT: # %bb.2:
589 ; AVX512F-NEXT: kxorw %k0, %k0, %k2
590 ; AVX512F-NEXT: jmp .LBB7_3
591 ; AVX512F-NEXT: .LBB7_1:
592 ; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm0, %k2
593 ; AVX512F-NEXT: .LBB7_3:
594 ; AVX512F-NEXT: korw %k0, %k1, %k0
595 ; AVX512F-NEXT: korw %k2, %k0, %k0
596 ; AVX512F-NEXT: kmovw %k0, %eax
597 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
598 ; AVX512F-NEXT: vzeroupper
601 ; AVX512BW-LABEL: v8i32_or_select:
603 ; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
604 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
605 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm1, %k1
606 ; AVX512BW-NEXT: testb $1, %dil
607 ; AVX512BW-NEXT: jne .LBB7_1
608 ; AVX512BW-NEXT: # %bb.2:
609 ; AVX512BW-NEXT: kxorw %k0, %k0, %k2
610 ; AVX512BW-NEXT: jmp .LBB7_3
611 ; AVX512BW-NEXT: .LBB7_1:
612 ; AVX512BW-NEXT: vpcmpeqd %ymm2, %ymm0, %k2
613 ; AVX512BW-NEXT: .LBB7_3:
614 ; AVX512BW-NEXT: korw %k0, %k1, %k0
615 ; AVX512BW-NEXT: korw %k2, %k0, %k0
616 ; AVX512BW-NEXT: kmovd %k0, %eax
617 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
618 ; AVX512BW-NEXT: vzeroupper
619 ; AVX512BW-NEXT: retq
620 %cmp0 = icmp eq <8 x i32> %a0, %a1
621 %cmp1 = icmp eq <8 x i32> %a0, %a2
622 %cmp2 = icmp slt <8 x i32> %a3, zeroinitializer
623 %sel = select i1 %a4, <8 x i1> %cmp1, <8 x i1> zeroinitializer
624 %or0 = or <8 x i1> %cmp2, %cmp0
625 %or1 = or <8 x i1> %or0, %sel
626 %res = bitcast <8 x i1> %or1 to i8
630 define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
631 ; SSE2-SSSE3-LABEL: v8f32_and:
632 ; SSE2-SSSE3: # %bb.0:
633 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
634 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
635 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
636 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6
637 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
638 ; SSE2-SSSE3-NEXT: andps %xmm3, %xmm7
639 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
640 ; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6
641 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
642 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
643 ; SSE2-SSSE3-NEXT: retq
645 ; AVX12-LABEL: v8f32_and:
647 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
648 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
649 ; AVX12-NEXT: vandps %ymm2, %ymm0, %ymm0
650 ; AVX12-NEXT: vmovmskps %ymm0, %eax
651 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
652 ; AVX12-NEXT: vzeroupper
655 ; AVX512F-LABEL: v8f32_and:
657 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1
658 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
659 ; AVX512F-NEXT: kmovw %k0, %eax
660 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
661 ; AVX512F-NEXT: vzeroupper
664 ; AVX512BW-LABEL: v8f32_and:
666 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1
667 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
668 ; AVX512BW-NEXT: kmovd %k0, %eax
669 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
670 ; AVX512BW-NEXT: vzeroupper
671 ; AVX512BW-NEXT: retq
672 %x0 = fcmp ogt <8 x float> %a, %b
673 %x1 = fcmp ogt <8 x float> %c, %d
674 %y = and <8 x i1> %x0, %x1
675 %res = bitcast <8 x i1> %y to i8
679 ; We should see through any bitwise logic op.
681 define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
682 ; SSE2-SSSE3-LABEL: v8f32_xor:
683 ; SSE2-SSSE3: # %bb.0:
684 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
685 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
686 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
687 ; SSE2-SSSE3-NEXT: xorps %xmm2, %xmm6
688 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
689 ; SSE2-SSSE3-NEXT: xorps %xmm3, %xmm7
690 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
691 ; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6
692 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
693 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
694 ; SSE2-SSSE3-NEXT: retq
696 ; AVX12-LABEL: v8f32_xor:
698 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
699 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
700 ; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
701 ; AVX12-NEXT: vmovmskps %ymm0, %eax
702 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
703 ; AVX12-NEXT: vzeroupper
706 ; AVX512F-LABEL: v8f32_xor:
708 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k0
709 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k1
710 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
711 ; AVX512F-NEXT: kmovw %k0, %eax
712 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
713 ; AVX512F-NEXT: vzeroupper
716 ; AVX512BW-LABEL: v8f32_xor:
718 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k0
719 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k1
720 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
721 ; AVX512BW-NEXT: kmovd %k0, %eax
722 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
723 ; AVX512BW-NEXT: vzeroupper
724 ; AVX512BW-NEXT: retq
725 %x0 = fcmp ogt <8 x float> %a, %b
726 %x1 = fcmp ogt <8 x float> %c, %d
727 %y = xor <8 x i1> %x0, %x1
728 %res = bitcast <8 x i1> %y to i8
732 ; We should see through multiple bitwise logic ops.
734 define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) {
735 ; SSE2-SSSE3-LABEL: v8f32_xor_and:
736 ; SSE2-SSSE3: # %bb.0:
737 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
738 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
739 ; SSE2-SSSE3-NEXT: cmpnleps %xmm3, %xmm1
740 ; SSE2-SSSE3-NEXT: cmpnleps %xmm2, %xmm0
741 ; SSE2-SSSE3-NEXT: movaps %xmm5, %xmm2
742 ; SSE2-SSSE3-NEXT: cmpeqps %xmm7, %xmm2
743 ; SSE2-SSSE3-NEXT: cmpunordps %xmm7, %xmm5
744 ; SSE2-SSSE3-NEXT: orps %xmm2, %xmm5
745 ; SSE2-SSSE3-NEXT: xorps %xmm1, %xmm5
746 ; SSE2-SSSE3-NEXT: movaps %xmm4, %xmm1
747 ; SSE2-SSSE3-NEXT: cmpeqps %xmm6, %xmm1
748 ; SSE2-SSSE3-NEXT: cmpunordps %xmm6, %xmm4
749 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm4
750 ; SSE2-SSSE3-NEXT: xorps %xmm0, %xmm4
751 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
752 ; SSE2-SSSE3-NEXT: andps %xmm4, %xmm8
753 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
754 ; SSE2-SSSE3-NEXT: andps %xmm5, %xmm9
755 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
756 ; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8
757 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
758 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
759 ; SSE2-SSSE3-NEXT: retq
761 ; AVX12-LABEL: v8f32_xor_and:
763 ; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm2
764 ; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0
765 ; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
766 ; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1
767 ; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0
768 ; AVX12-NEXT: vmovmskps %ymm0, %eax
769 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
770 ; AVX12-NEXT: vzeroupper
773 ; AVX512F-LABEL: v8f32_xor_and:
775 ; AVX512F-NEXT: vcmpnleps %ymm1, %ymm0, %k0
776 ; AVX512F-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
777 ; AVX512F-NEXT: kxorw %k1, %k0, %k1
778 ; AVX512F-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
779 ; AVX512F-NEXT: kmovw %k0, %eax
780 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
781 ; AVX512F-NEXT: vzeroupper
784 ; AVX512BW-LABEL: v8f32_xor_and:
786 ; AVX512BW-NEXT: vcmpnleps %ymm1, %ymm0, %k0
787 ; AVX512BW-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
788 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1
789 ; AVX512BW-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
790 ; AVX512BW-NEXT: kmovd %k0, %eax
791 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
792 ; AVX512BW-NEXT: vzeroupper
793 ; AVX512BW-NEXT: retq
794 %x0 = fcmp ugt <8 x float> %a, %b
795 %x1 = fcmp ueq <8 x float> %c, %d
796 %x2 = fcmp ogt <8 x float> %e, %f
797 %y = xor <8 x i1> %x0, %x1
798 %z = and <8 x i1> %y, %x2
799 %res = bitcast <8 x i1> %z to i8
803 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
804 ; SSE2-SSSE3-LABEL: v32i8:
805 ; SSE2-SSSE3: # %bb.0:
806 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
807 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
808 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
809 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
810 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
811 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
812 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx
813 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax
814 ; SSE2-SSSE3-NEXT: shll $16, %eax
815 ; SSE2-SSSE3-NEXT: orl %ecx, %eax
816 ; SSE2-SSSE3-NEXT: retq
820 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
821 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
822 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
823 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
824 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
825 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
826 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1
827 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
828 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2
829 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
830 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
831 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
832 ; AVX1-NEXT: shll $16, %eax
833 ; AVX1-NEXT: orl %ecx, %eax
834 ; AVX1-NEXT: vzeroupper
839 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
840 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
841 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
842 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
843 ; AVX2-NEXT: vzeroupper
846 ; AVX512F-LABEL: v32i8:
848 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
849 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
850 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
851 ; AVX512F-NEXT: vpmovmskb %ymm0, %eax
852 ; AVX512F-NEXT: vzeroupper
855 ; AVX512BW-LABEL: v32i8:
857 ; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
858 ; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
859 ; AVX512BW-NEXT: kmovd %k0, %eax
860 ; AVX512BW-NEXT: vzeroupper
861 ; AVX512BW-NEXT: retq
862 %x0 = icmp sgt <32 x i8> %a, %b
863 %x1 = icmp sgt <32 x i8> %c, %d
864 %y = and <32 x i1> %x0, %x1
865 %res = bitcast <32 x i1> %y to i32
869 ; PR61683 - ignore upper undef elements
870 define i8 @v4i32_concat_undef(<4 x i32> %vec) {
871 ; SSE2-SSSE3-LABEL: v4i32_concat_undef:
872 ; SSE2-SSSE3: # %bb.0:
873 ; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1
874 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
875 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax
876 ; SSE2-SSSE3-NEXT: xorl $15, %eax
877 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
878 ; SSE2-SSSE3-NEXT: retq
880 ; AVX12-LABEL: v4i32_concat_undef:
882 ; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
883 ; AVX12-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
884 ; AVX12-NEXT: vmovmskps %xmm0, %eax
885 ; AVX12-NEXT: xorl $15, %eax
886 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
889 ; AVX512F-LABEL: v4i32_concat_undef:
891 ; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k0
892 ; AVX512F-NEXT: kmovw %k0, %eax
893 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
896 ; AVX512BW-LABEL: v4i32_concat_undef:
898 ; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k0
899 ; AVX512BW-NEXT: kmovd %k0, %eax
900 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
901 ; AVX512BW-NEXT: retq
902 %tobool = icmp ne <4 x i32> %vec, zeroinitializer
903 %insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
904 %res = bitcast <8 x i1> %insertvec to i8
908 define i8 @v2i64_concat_undef(<2 x i64> %vec) {
909 ; SSE2-SSSE3-LABEL: v2i64_concat_undef:
910 ; SSE2-SSSE3: # %bb.0:
911 ; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1
912 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
913 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
914 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
915 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
916 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
917 ; SSE2-SSSE3-NEXT: retq
919 ; AVX12-LABEL: v2i64_concat_undef:
921 ; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
922 ; AVX12-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
923 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
924 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
927 ; AVX512F-LABEL: v2i64_concat_undef:
929 ; AVX512F-NEXT: vptestnmq %xmm0, %xmm0, %k0
930 ; AVX512F-NEXT: kmovw %k0, %eax
931 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
934 ; AVX512BW-LABEL: v2i64_concat_undef:
936 ; AVX512BW-NEXT: vptestnmq %xmm0, %xmm0, %k0
937 ; AVX512BW-NEXT: kmovd %k0, %eax
938 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
939 ; AVX512BW-NEXT: retq
940 %tobool = icmp eq <2 x i64> %vec, zeroinitializer
941 %insertvec = shufflevector <2 x i1> %tobool, <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
942 %res = bitcast <8 x i1> %insertvec to i8
946 define i8 @v4f64_concat_undef(<4 x double> %vec) {
947 ; SSE2-SSSE3-LABEL: v4f64_concat_undef:
948 ; SSE2-SSSE3: # %bb.0:
949 ; SSE2-SSSE3-NEXT: xorpd %xmm2, %xmm2
950 ; SSE2-SSSE3-NEXT: xorpd %xmm3, %xmm3
951 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
952 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
953 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
954 ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
955 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
956 ; SSE2-SSSE3-NEXT: retq
958 ; AVX12-LABEL: v4f64_concat_undef:
960 ; AVX12-NEXT: vxorpd %xmm1, %xmm1, %xmm1
961 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
962 ; AVX12-NEXT: vmovmskpd %ymm0, %eax
963 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
964 ; AVX12-NEXT: vzeroupper
967 ; AVX512F-LABEL: v4f64_concat_undef:
969 ; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
970 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k0
971 ; AVX512F-NEXT: kmovw %k0, %eax
972 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
973 ; AVX512F-NEXT: vzeroupper
976 ; AVX512BW-LABEL: v4f64_concat_undef:
978 ; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
979 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k0
980 ; AVX512BW-NEXT: kmovd %k0, %eax
981 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
982 ; AVX512BW-NEXT: vzeroupper
983 ; AVX512BW-NEXT: retq
984 %tobool = fcmp ogt <4 x double> %vec, zeroinitializer
985 %insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
986 %res = bitcast <8 x i1> %insertvec to i8