1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefix=SSE2-SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefix=SSE2-SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
10 ; SSE2-SSSE3-LABEL: v4i64:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648]
13 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm3
14 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm1
15 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm10
16 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm10
17 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm2
18 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm0
19 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm8
20 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm8
21 ; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm11
22 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm11 = xmm11[0,2],xmm10[0,2]
23 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
24 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
25 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
26 ; SSE2-SSSE3-NEXT: andps %xmm11, %xmm0
27 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,3],xmm10[1,3]
28 ; SSE2-SSSE3-NEXT: orps %xmm0, %xmm8
29 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm7
30 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm5
31 ; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm0
32 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0
33 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm6
34 ; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm4
35 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
36 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm1
37 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
38 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
39 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5
40 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4
41 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
42 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm4
43 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3]
44 ; SSE2-SSSE3-NEXT: orps %xmm4, %xmm1
45 ; SSE2-SSSE3-NEXT: andps %xmm8, %xmm1
46 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax
47 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
48 ; SSE2-SSSE3-NEXT: retq
52 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
53 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
54 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
55 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
56 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
57 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
58 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
59 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
60 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
61 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
62 ; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm0
63 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
64 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
65 ; AVX1-NEXT: vzeroupper
70 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
71 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
72 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
73 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
74 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
75 ; AVX2-NEXT: vzeroupper
78 ; AVX512F-LABEL: v4i64:
80 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
81 ; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
82 ; AVX512F-NEXT: kmovw %k0, %eax
83 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
84 ; AVX512F-NEXT: vzeroupper
87 ; AVX512BW-LABEL: v4i64:
89 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
90 ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
91 ; AVX512BW-NEXT: kmovd %k0, %eax
92 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
93 ; AVX512BW-NEXT: vzeroupper
95 %x0 = icmp sgt <4 x i64> %a, %b
96 %x1 = icmp sgt <4 x i64> %c, %d
97 %y = and <4 x i1> %x0, %x1
98 %res = bitcast <4 x i1> %y to i4
102 define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
103 ; SSE2-SSSE3-LABEL: v4f64:
104 ; SSE2-SSSE3: # %bb.0:
105 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
106 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
107 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
108 ; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7
109 ; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6
110 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
111 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6
112 ; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax
113 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
114 ; SSE2-SSSE3-NEXT: retq
116 ; AVX12-LABEL: v4f64:
118 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm2
119 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
120 ; AVX12-NEXT: vandpd %ymm2, %ymm0, %ymm0
121 ; AVX12-NEXT: vmovmskpd %ymm0, %eax
122 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
123 ; AVX12-NEXT: vzeroupper
126 ; AVX512F-LABEL: v4f64:
128 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1
129 ; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
130 ; AVX512F-NEXT: kmovw %k0, %eax
131 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
132 ; AVX512F-NEXT: vzeroupper
135 ; AVX512BW-LABEL: v4f64:
137 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1
138 ; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
139 ; AVX512BW-NEXT: kmovd %k0, %eax
140 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
141 ; AVX512BW-NEXT: vzeroupper
142 ; AVX512BW-NEXT: retq
143 %x0 = fcmp ogt <4 x double> %a, %b
144 %x1 = fcmp ogt <4 x double> %c, %d
145 %y = and <4 x i1> %x0, %x1
146 %res = bitcast <4 x i1> %y to i4
150 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
151 ; SSE2-SSSE3-LABEL: v16i16:
152 ; SSE2-SSSE3: # %bb.0:
153 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1
154 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0
155 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
156 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5
157 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4
158 ; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4
159 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
160 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
161 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
162 ; SSE2-SSSE3-NEXT: retq
164 ; AVX1-LABEL: v16i16:
166 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm4
167 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
168 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
169 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
170 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
171 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
172 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
173 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
174 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2
175 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
176 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
177 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
178 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
179 ; AVX1-NEXT: vzeroupper
182 ; AVX2-LABEL: v16i16:
184 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
185 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
186 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
187 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
188 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
189 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
190 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
191 ; AVX2-NEXT: vzeroupper
194 ; AVX512F-LABEL: v16i16:
196 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
197 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
198 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
199 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
200 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
201 ; AVX512F-NEXT: kmovw %k0, %eax
202 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
203 ; AVX512F-NEXT: vzeroupper
206 ; AVX512BW-LABEL: v16i16:
208 ; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
209 ; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
210 ; AVX512BW-NEXT: kmovd %k0, %eax
211 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
212 ; AVX512BW-NEXT: vzeroupper
213 ; AVX512BW-NEXT: retq
214 %x0 = icmp sgt <16 x i16> %a, %b
215 %x1 = icmp sgt <16 x i16> %c, %d
216 %y = and <16 x i1> %x0, %x1
217 %res = bitcast <16 x i1> %y to i16
221 define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
222 ; SSE2-SSSE3-LABEL: v8i32_and:
223 ; SSE2-SSSE3: # %bb.0:
224 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
225 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
226 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
227 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
228 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
229 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
230 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
231 ; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4
232 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
233 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
234 ; SSE2-SSSE3-NEXT: retq
236 ; AVX1-LABEL: v8i32_and:
238 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
239 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
240 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
241 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
242 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
243 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
244 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
245 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
246 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
247 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
248 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
249 ; AVX1-NEXT: vmovmskps %ymm0, %eax
250 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
251 ; AVX1-NEXT: vzeroupper
254 ; AVX2-LABEL: v8i32_and:
256 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
257 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
258 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
259 ; AVX2-NEXT: vmovmskps %ymm0, %eax
260 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
261 ; AVX2-NEXT: vzeroupper
264 ; AVX512F-LABEL: v8i32_and:
266 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
267 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
268 ; AVX512F-NEXT: kmovw %k0, %eax
269 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
270 ; AVX512F-NEXT: vzeroupper
273 ; AVX512BW-LABEL: v8i32_and:
275 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
276 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
277 ; AVX512BW-NEXT: kmovd %k0, %eax
278 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
279 ; AVX512BW-NEXT: vzeroupper
280 ; AVX512BW-NEXT: retq
281 %x0 = icmp sgt <8 x i32> %a, %b
282 %x1 = icmp sgt <8 x i32> %c, %d
283 %y = and <8 x i1> %x0, %x1
284 %res = bitcast <8 x i1> %y to i8
288 ; We should see through any bitwise logic op.
290 define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
291 ; SSE2-SSSE3-LABEL: v8i32_or:
292 ; SSE2-SSSE3: # %bb.0:
293 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
294 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
295 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
296 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
297 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
298 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
299 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm4
300 ; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4
301 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
302 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
303 ; SSE2-SSSE3-NEXT: retq
305 ; AVX1-LABEL: v8i32_or:
307 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
308 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
309 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
310 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
311 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
312 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
313 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
314 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
315 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
316 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
317 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
318 ; AVX1-NEXT: vmovmskps %ymm0, %eax
319 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
320 ; AVX1-NEXT: vzeroupper
323 ; AVX2-LABEL: v8i32_or:
325 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
326 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
327 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
328 ; AVX2-NEXT: vmovmskps %ymm0, %eax
329 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
330 ; AVX2-NEXT: vzeroupper
333 ; AVX512F-LABEL: v8i32_or:
335 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
336 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
337 ; AVX512F-NEXT: korw %k1, %k0, %k0
338 ; AVX512F-NEXT: kmovw %k0, %eax
339 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
340 ; AVX512F-NEXT: vzeroupper
343 ; AVX512BW-LABEL: v8i32_or:
345 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
346 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
347 ; AVX512BW-NEXT: korw %k1, %k0, %k0
348 ; AVX512BW-NEXT: kmovd %k0, %eax
349 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
350 ; AVX512BW-NEXT: vzeroupper
351 ; AVX512BW-NEXT: retq
352 %x0 = icmp sgt <8 x i32> %a, %b
353 %x1 = icmp sgt <8 x i32> %c, %d
354 %y = or <8 x i1> %x0, %x1
355 %res = bitcast <8 x i1> %y to i8
359 ; We should see through multiple bitwise logic ops.
361 define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) {
362 ; SSE2-SSSE3-LABEL: v8i32_or_and:
363 ; SSE2-SSSE3: # %bb.0:
364 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
365 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
366 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
367 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
368 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
369 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
370 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
371 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
372 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm6
373 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm9
374 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8
375 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
376 ; SSE2-SSSE3-NEXT: pand %xmm6, %xmm8
377 ; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8
378 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
379 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
380 ; SSE2-SSSE3-NEXT: retq
382 ; AVX1-LABEL: v8i32_or_and:
384 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
385 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7
386 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm7, %xmm6
387 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
388 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
389 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
390 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
391 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3
392 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
393 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
394 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
395 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
396 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2
397 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
398 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2
399 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
400 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
401 ; AVX1-NEXT: vmovmskps %ymm0, %eax
402 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
403 ; AVX1-NEXT: vzeroupper
406 ; AVX2-LABEL: v8i32_or_and:
408 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2
409 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
410 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
411 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1
412 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
413 ; AVX2-NEXT: vmovmskps %ymm0, %eax
414 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
415 ; AVX2-NEXT: vzeroupper
418 ; AVX512F-LABEL: v8i32_or_and:
420 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
421 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
422 ; AVX512F-NEXT: korw %k1, %k0, %k1
423 ; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
424 ; AVX512F-NEXT: kmovw %k0, %eax
425 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
426 ; AVX512F-NEXT: vzeroupper
429 ; AVX512BW-LABEL: v8i32_or_and:
431 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
432 ; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
433 ; AVX512BW-NEXT: korw %k1, %k0, %k1
434 ; AVX512BW-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
435 ; AVX512BW-NEXT: kmovd %k0, %eax
436 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
437 ; AVX512BW-NEXT: vzeroupper
438 ; AVX512BW-NEXT: retq
439 %x0 = icmp sgt <8 x i32> %a, %b
440 %x1 = icmp slt <8 x i32> %c, %d
441 %x2 = icmp eq <8 x i32> %e, %f
442 %y = or <8 x i1> %x0, %x1
443 %z = and <8 x i1> %y, %x2
444 %res = bitcast <8 x i1> %z to i8
448 define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
449 ; SSE2-SSSE3-LABEL: v8f32_and:
450 ; SSE2-SSSE3: # %bb.0:
451 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
452 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
453 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
454 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
455 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
456 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
457 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
458 ; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6
459 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
460 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
461 ; SSE2-SSSE3-NEXT: retq
463 ; AVX12-LABEL: v8f32_and:
465 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
466 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
467 ; AVX12-NEXT: vandps %ymm2, %ymm0, %ymm0
468 ; AVX12-NEXT: vmovmskps %ymm0, %eax
469 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
470 ; AVX12-NEXT: vzeroupper
473 ; AVX512F-LABEL: v8f32_and:
475 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1
476 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
477 ; AVX512F-NEXT: kmovw %k0, %eax
478 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
479 ; AVX512F-NEXT: vzeroupper
482 ; AVX512BW-LABEL: v8f32_and:
484 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1
485 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
486 ; AVX512BW-NEXT: kmovd %k0, %eax
487 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
488 ; AVX512BW-NEXT: vzeroupper
489 ; AVX512BW-NEXT: retq
490 %x0 = fcmp ogt <8 x float> %a, %b
491 %x1 = fcmp ogt <8 x float> %c, %d
492 %y = and <8 x i1> %x0, %x1
493 %res = bitcast <8 x i1> %y to i8
497 ; We should see through any bitwise logic op.
499 define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
500 ; SSE2-SSSE3-LABEL: v8f32_xor:
501 ; SSE2-SSSE3: # %bb.0:
502 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
503 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
504 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
505 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
506 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
507 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
508 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6
509 ; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6
510 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
511 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
512 ; SSE2-SSSE3-NEXT: retq
514 ; AVX12-LABEL: v8f32_xor:
516 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
517 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
518 ; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
519 ; AVX12-NEXT: vmovmskps %ymm0, %eax
520 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
521 ; AVX12-NEXT: vzeroupper
524 ; AVX512F-LABEL: v8f32_xor:
526 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k0
527 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k1
528 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
529 ; AVX512F-NEXT: kmovw %k0, %eax
530 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
531 ; AVX512F-NEXT: vzeroupper
534 ; AVX512BW-LABEL: v8f32_xor:
536 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k0
537 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k1
538 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
539 ; AVX512BW-NEXT: kmovd %k0, %eax
540 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
541 ; AVX512BW-NEXT: vzeroupper
542 ; AVX512BW-NEXT: retq
543 %x0 = fcmp ogt <8 x float> %a, %b
544 %x1 = fcmp ogt <8 x float> %c, %d
545 %y = xor <8 x i1> %x0, %x1
546 %res = bitcast <8 x i1> %y to i8
550 ; We should see through multiple bitwise logic ops.
552 define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) {
553 ; SSE2-SSSE3-LABEL: v8f32_xor_and:
554 ; SSE2-SSSE3: # %bb.0:
555 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
556 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
557 ; SSE2-SSSE3-NEXT: cmpnleps %xmm3, %xmm1
558 ; SSE2-SSSE3-NEXT: cmpnleps %xmm2, %xmm0
559 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
560 ; SSE2-SSSE3-NEXT: movaps %xmm5, %xmm1
561 ; SSE2-SSSE3-NEXT: cmpeqps %xmm7, %xmm1
562 ; SSE2-SSSE3-NEXT: cmpunordps %xmm7, %xmm5
563 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm5
564 ; SSE2-SSSE3-NEXT: movaps %xmm4, %xmm1
565 ; SSE2-SSSE3-NEXT: cmpeqps %xmm6, %xmm1
566 ; SSE2-SSSE3-NEXT: cmpunordps %xmm6, %xmm4
567 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm4
568 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
569 ; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm4
570 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
571 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
572 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
573 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8
574 ; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8
575 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
576 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
577 ; SSE2-SSSE3-NEXT: retq
579 ; AVX12-LABEL: v8f32_xor_and:
581 ; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm2
582 ; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0
583 ; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
584 ; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1
585 ; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0
586 ; AVX12-NEXT: vmovmskps %ymm0, %eax
587 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
588 ; AVX12-NEXT: vzeroupper
591 ; AVX512F-LABEL: v8f32_xor_and:
593 ; AVX512F-NEXT: vcmpnleps %ymm1, %ymm0, %k0
594 ; AVX512F-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
595 ; AVX512F-NEXT: kxorw %k1, %k0, %k1
596 ; AVX512F-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
597 ; AVX512F-NEXT: kmovw %k0, %eax
598 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
599 ; AVX512F-NEXT: vzeroupper
602 ; AVX512BW-LABEL: v8f32_xor_and:
604 ; AVX512BW-NEXT: vcmpnleps %ymm1, %ymm0, %k0
605 ; AVX512BW-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
606 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1
607 ; AVX512BW-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
608 ; AVX512BW-NEXT: kmovd %k0, %eax
609 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
610 ; AVX512BW-NEXT: vzeroupper
611 ; AVX512BW-NEXT: retq
612 %x0 = fcmp ugt <8 x float> %a, %b
613 %x1 = fcmp ueq <8 x float> %c, %d
614 %x2 = fcmp ogt <8 x float> %e, %f
615 %y = xor <8 x i1> %x0, %x1
616 %z = and <8 x i1> %y, %x2
617 %res = bitcast <8 x i1> %z to i8
621 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
622 ; SSE2-SSSE3-LABEL: v32i8:
623 ; SSE2-SSSE3: # %bb.0:
624 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
625 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
626 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
627 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
628 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
629 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
630 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx
631 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax
632 ; SSE2-SSSE3-NEXT: shll $16, %eax
633 ; SSE2-SSSE3-NEXT: orl %ecx, %eax
634 ; SSE2-SSSE3-NEXT: retq
638 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
639 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
640 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
641 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
642 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
643 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
644 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1
645 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
646 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2
647 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
648 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
649 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
650 ; AVX1-NEXT: shll $16, %eax
651 ; AVX1-NEXT: orl %ecx, %eax
652 ; AVX1-NEXT: vzeroupper
657 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
658 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
659 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
660 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
661 ; AVX2-NEXT: vzeroupper
664 ; AVX512F-LABEL: v32i8:
666 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
667 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
668 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
669 ; AVX512F-NEXT: vpmovmskb %ymm0, %eax
670 ; AVX512F-NEXT: vzeroupper
673 ; AVX512BW-LABEL: v32i8:
675 ; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
676 ; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
677 ; AVX512BW-NEXT: kmovd %k0, %eax
678 ; AVX512BW-NEXT: vzeroupper
679 ; AVX512BW-NEXT: retq
680 %x0 = icmp sgt <32 x i8> %a, %b
681 %x1 = icmp sgt <32 x i8> %c, %d
682 %y = and <32 x i1> %x0, %x1
683 %res = bitcast <32 x i1> %y to i32