1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
9 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
10 ; SSE2-SSSE3-LABEL: v4i64:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
13 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm3
14 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm1
15 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm9
16 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm9
17 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
18 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
19 ; SSE2-SSSE3-NEXT: pand %xmm9, %xmm1
20 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
21 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
22 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm2
23 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm0
24 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1
25 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
26 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
27 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
28 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
29 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
31 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
32 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm7
33 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm5
34 ; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1
35 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1
36 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5
37 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
38 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
39 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
40 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm1
41 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm6
42 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm4
43 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
44 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm2
45 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4
46 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
47 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
48 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
49 ; SSE2-SSSE3-NEXT: por %xmm3, %xmm2
50 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
51 ; SSE2-SSSE3-NEXT: andps %xmm0, %xmm2
52 ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
53 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
54 ; SSE2-SSSE3-NEXT: retq
58 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
59 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
60 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
61 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
62 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
63 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
64 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
65 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
66 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
67 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
68 ; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm0
69 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
70 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
71 ; AVX1-NEXT: vzeroupper
76 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
77 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
78 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
79 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
80 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
81 ; AVX2-NEXT: vzeroupper
84 ; AVX512F-LABEL: v4i64:
86 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
87 ; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
88 ; AVX512F-NEXT: kmovw %k0, %eax
89 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
90 ; AVX512F-NEXT: vzeroupper
93 ; AVX512BW-LABEL: v4i64:
95 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
96 ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
97 ; AVX512BW-NEXT: kmovd %k0, %eax
98 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
99 ; AVX512BW-NEXT: vzeroupper
100 ; AVX512BW-NEXT: retq
101 %x0 = icmp sgt <4 x i64> %a, %b
102 %x1 = icmp sgt <4 x i64> %c, %d
103 %y = and <4 x i1> %x0, %x1
104 %res = bitcast <4 x i1> %y to i4
108 define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
109 ; SSE2-SSSE3-LABEL: v4f64:
110 ; SSE2-SSSE3: # %bb.0:
111 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
112 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
113 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
114 ; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7
115 ; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6
116 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
117 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6
118 ; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax
119 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
120 ; SSE2-SSSE3-NEXT: retq
122 ; AVX12-LABEL: v4f64:
124 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm2
125 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
126 ; AVX12-NEXT: vandpd %ymm2, %ymm0, %ymm0
127 ; AVX12-NEXT: vmovmskpd %ymm0, %eax
128 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
129 ; AVX12-NEXT: vzeroupper
132 ; AVX512F-LABEL: v4f64:
134 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1
135 ; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
136 ; AVX512F-NEXT: kmovw %k0, %eax
137 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
138 ; AVX512F-NEXT: vzeroupper
141 ; AVX512BW-LABEL: v4f64:
143 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1
144 ; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
145 ; AVX512BW-NEXT: kmovd %k0, %eax
146 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
147 ; AVX512BW-NEXT: vzeroupper
148 ; AVX512BW-NEXT: retq
149 %x0 = fcmp ogt <4 x double> %a, %b
150 %x1 = fcmp ogt <4 x double> %c, %d
151 %y = and <4 x i1> %x0, %x1
152 %res = bitcast <4 x i1> %y to i4
156 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
157 ; SSE2-SSSE3-LABEL: v16i16:
158 ; SSE2-SSSE3: # %bb.0:
159 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1
160 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0
161 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
162 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5
163 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4
164 ; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4
165 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
166 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
167 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
168 ; SSE2-SSSE3-NEXT: retq
170 ; AVX1-LABEL: v16i16:
172 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm4
173 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
175 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
176 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
177 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
178 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
179 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
180 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2
181 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
182 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
183 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
184 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
185 ; AVX1-NEXT: vzeroupper
188 ; AVX2-LABEL: v16i16:
190 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
191 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
192 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
193 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
194 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
195 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
196 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
197 ; AVX2-NEXT: vzeroupper
200 ; AVX512F-LABEL: v16i16:
202 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
203 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
204 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
205 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
206 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
207 ; AVX512F-NEXT: kmovw %k0, %eax
208 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
209 ; AVX512F-NEXT: vzeroupper
212 ; AVX512BW-LABEL: v16i16:
214 ; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
215 ; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
216 ; AVX512BW-NEXT: kmovd %k0, %eax
217 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
218 ; AVX512BW-NEXT: vzeroupper
219 ; AVX512BW-NEXT: retq
220 %x0 = icmp sgt <16 x i16> %a, %b
221 %x1 = icmp sgt <16 x i16> %c, %d
222 %y = and <16 x i1> %x0, %x1
223 %res = bitcast <16 x i1> %y to i16
227 define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
228 ; SSE2-SSSE3-LABEL: v8i32_and:
229 ; SSE2-SSSE3: # %bb.0:
230 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
231 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
232 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
233 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
234 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
235 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
236 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
237 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
238 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
239 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
240 ; SSE2-SSSE3-NEXT: retq
242 ; AVX1-LABEL: v8i32_and:
244 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
245 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
246 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
247 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
248 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
249 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
250 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
251 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
252 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
253 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
254 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
255 ; AVX1-NEXT: vmovmskps %ymm0, %eax
256 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
257 ; AVX1-NEXT: vzeroupper
260 ; AVX2-LABEL: v8i32_and:
262 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
263 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
264 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
265 ; AVX2-NEXT: vmovmskps %ymm0, %eax
266 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
267 ; AVX2-NEXT: vzeroupper
270 ; AVX512F-LABEL: v8i32_and:
272 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
273 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
274 ; AVX512F-NEXT: kmovw %k0, %eax
275 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
276 ; AVX512F-NEXT: vzeroupper
279 ; AVX512BW-LABEL: v8i32_and:
281 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
282 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
283 ; AVX512BW-NEXT: kmovd %k0, %eax
284 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
285 ; AVX512BW-NEXT: vzeroupper
286 ; AVX512BW-NEXT: retq
287 %x0 = icmp sgt <8 x i32> %a, %b
288 %x1 = icmp sgt <8 x i32> %c, %d
289 %y = and <8 x i1> %x0, %x1
290 %res = bitcast <8 x i1> %y to i8
294 ; We should see through any bitwise logic op.
296 define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
297 ; SSE2-SSSE3-LABEL: v8i32_or:
298 ; SSE2-SSSE3: # %bb.0:
299 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
300 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
301 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
302 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
303 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
304 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
305 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm4
306 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
307 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
308 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
309 ; SSE2-SSSE3-NEXT: retq
311 ; AVX1-LABEL: v8i32_or:
313 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
314 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
315 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
316 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
317 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
318 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
319 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
320 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
321 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
322 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
323 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
324 ; AVX1-NEXT: vmovmskps %ymm0, %eax
325 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
326 ; AVX1-NEXT: vzeroupper
329 ; AVX2-LABEL: v8i32_or:
331 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
332 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
333 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
334 ; AVX2-NEXT: vmovmskps %ymm0, %eax
335 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
336 ; AVX2-NEXT: vzeroupper
339 ; AVX512F-LABEL: v8i32_or:
341 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
342 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
343 ; AVX512F-NEXT: korw %k1, %k0, %k0
344 ; AVX512F-NEXT: kmovw %k0, %eax
345 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
346 ; AVX512F-NEXT: vzeroupper
349 ; AVX512BW-LABEL: v8i32_or:
351 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
352 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
353 ; AVX512BW-NEXT: korw %k1, %k0, %k0
354 ; AVX512BW-NEXT: kmovd %k0, %eax
355 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
356 ; AVX512BW-NEXT: vzeroupper
357 ; AVX512BW-NEXT: retq
358 %x0 = icmp sgt <8 x i32> %a, %b
359 %x1 = icmp sgt <8 x i32> %c, %d
360 %y = or <8 x i1> %x0, %x1
361 %res = bitcast <8 x i1> %y to i8
365 ; We should see through multiple bitwise logic ops.
367 define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) {
368 ; SSE2-SSSE3-LABEL: v8i32_or_and:
369 ; SSE2-SSSE3: # %bb.0:
370 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
371 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
372 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
373 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
374 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
375 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
376 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
377 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
378 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm6
379 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm9
380 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8
381 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
382 ; SSE2-SSSE3-NEXT: pand %xmm6, %xmm8
383 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
384 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
385 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
386 ; SSE2-SSSE3-NEXT: retq
388 ; AVX1-LABEL: v8i32_or_and:
390 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
391 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7
392 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm7, %xmm6
393 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
394 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
395 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
396 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
397 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3
398 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
399 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
400 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
401 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
402 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2
403 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
404 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2
405 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
406 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
407 ; AVX1-NEXT: vmovmskps %ymm0, %eax
408 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
409 ; AVX1-NEXT: vzeroupper
412 ; AVX2-LABEL: v8i32_or_and:
414 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2
415 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
416 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
417 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1
418 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
419 ; AVX2-NEXT: vmovmskps %ymm0, %eax
420 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
421 ; AVX2-NEXT: vzeroupper
424 ; AVX512F-LABEL: v8i32_or_and:
426 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
427 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
428 ; AVX512F-NEXT: korw %k1, %k0, %k1
429 ; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
430 ; AVX512F-NEXT: kmovw %k0, %eax
431 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
432 ; AVX512F-NEXT: vzeroupper
435 ; AVX512BW-LABEL: v8i32_or_and:
437 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
438 ; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
439 ; AVX512BW-NEXT: korw %k1, %k0, %k1
440 ; AVX512BW-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
441 ; AVX512BW-NEXT: kmovd %k0, %eax
442 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
443 ; AVX512BW-NEXT: vzeroupper
444 ; AVX512BW-NEXT: retq
445 %x0 = icmp sgt <8 x i32> %a, %b
446 %x1 = icmp slt <8 x i32> %c, %d
447 %x2 = icmp eq <8 x i32> %e, %f
448 %y = or <8 x i1> %x0, %x1
449 %z = and <8 x i1> %y, %x2
450 %res = bitcast <8 x i1> %z to i8
454 define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
455 ; SSE2-SSSE3-LABEL: v8f32_and:
456 ; SSE2-SSSE3: # %bb.0:
457 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
458 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
459 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
460 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
461 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
462 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
463 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
464 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
465 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
466 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
467 ; SSE2-SSSE3-NEXT: retq
469 ; AVX12-LABEL: v8f32_and:
471 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
472 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
473 ; AVX12-NEXT: vandps %ymm2, %ymm0, %ymm0
474 ; AVX12-NEXT: vmovmskps %ymm0, %eax
475 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
476 ; AVX12-NEXT: vzeroupper
479 ; AVX512F-LABEL: v8f32_and:
481 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1
482 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
483 ; AVX512F-NEXT: kmovw %k0, %eax
484 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
485 ; AVX512F-NEXT: vzeroupper
488 ; AVX512BW-LABEL: v8f32_and:
490 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1
491 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
492 ; AVX512BW-NEXT: kmovd %k0, %eax
493 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
494 ; AVX512BW-NEXT: vzeroupper
495 ; AVX512BW-NEXT: retq
496 %x0 = fcmp ogt <8 x float> %a, %b
497 %x1 = fcmp ogt <8 x float> %c, %d
498 %y = and <8 x i1> %x0, %x1
499 %res = bitcast <8 x i1> %y to i8
503 ; We should see through any bitwise logic op.
505 define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
506 ; SSE2-SSSE3-LABEL: v8f32_xor:
507 ; SSE2-SSSE3: # %bb.0:
508 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
509 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
510 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
511 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
512 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
513 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
514 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6
515 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
516 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
517 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
518 ; SSE2-SSSE3-NEXT: retq
520 ; AVX12-LABEL: v8f32_xor:
522 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
523 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
524 ; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
525 ; AVX12-NEXT: vmovmskps %ymm0, %eax
526 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
527 ; AVX12-NEXT: vzeroupper
530 ; AVX512F-LABEL: v8f32_xor:
532 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k0
533 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k1
534 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
535 ; AVX512F-NEXT: kmovw %k0, %eax
536 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
537 ; AVX512F-NEXT: vzeroupper
540 ; AVX512BW-LABEL: v8f32_xor:
542 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k0
543 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k1
544 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
545 ; AVX512BW-NEXT: kmovd %k0, %eax
546 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
547 ; AVX512BW-NEXT: vzeroupper
548 ; AVX512BW-NEXT: retq
549 %x0 = fcmp ogt <8 x float> %a, %b
550 %x1 = fcmp ogt <8 x float> %c, %d
551 %y = xor <8 x i1> %x0, %x1
552 %res = bitcast <8 x i1> %y to i8
556 ; We should see through multiple bitwise logic ops.
558 define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) {
559 ; SSE2-SSSE3-LABEL: v8f32_xor_and:
560 ; SSE2-SSSE3: # %bb.0:
561 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
562 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
563 ; SSE2-SSSE3-NEXT: cmpnleps %xmm3, %xmm1
564 ; SSE2-SSSE3-NEXT: cmpnleps %xmm2, %xmm0
565 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
566 ; SSE2-SSSE3-NEXT: movaps %xmm5, %xmm1
567 ; SSE2-SSSE3-NEXT: cmpeqps %xmm7, %xmm1
568 ; SSE2-SSSE3-NEXT: cmpunordps %xmm7, %xmm5
569 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm5
570 ; SSE2-SSSE3-NEXT: movaps %xmm4, %xmm1
571 ; SSE2-SSSE3-NEXT: cmpeqps %xmm6, %xmm1
572 ; SSE2-SSSE3-NEXT: cmpunordps %xmm6, %xmm4
573 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm4
574 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
575 ; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm4
576 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
577 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
578 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
579 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8
580 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
581 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
582 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
583 ; SSE2-SSSE3-NEXT: retq
585 ; AVX12-LABEL: v8f32_xor_and:
587 ; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm2
588 ; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0
589 ; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
590 ; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1
591 ; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0
592 ; AVX12-NEXT: vmovmskps %ymm0, %eax
593 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
594 ; AVX12-NEXT: vzeroupper
597 ; AVX512F-LABEL: v8f32_xor_and:
599 ; AVX512F-NEXT: vcmpnleps %ymm1, %ymm0, %k0
600 ; AVX512F-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
601 ; AVX512F-NEXT: kxorw %k1, %k0, %k1
602 ; AVX512F-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
603 ; AVX512F-NEXT: kmovw %k0, %eax
604 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
605 ; AVX512F-NEXT: vzeroupper
608 ; AVX512BW-LABEL: v8f32_xor_and:
610 ; AVX512BW-NEXT: vcmpnleps %ymm1, %ymm0, %k0
611 ; AVX512BW-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
612 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1
613 ; AVX512BW-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
614 ; AVX512BW-NEXT: kmovd %k0, %eax
615 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
616 ; AVX512BW-NEXT: vzeroupper
617 ; AVX512BW-NEXT: retq
618 %x0 = fcmp ugt <8 x float> %a, %b
619 %x1 = fcmp ueq <8 x float> %c, %d
620 %x2 = fcmp ogt <8 x float> %e, %f
621 %y = xor <8 x i1> %x0, %x1
622 %z = and <8 x i1> %y, %x2
623 %res = bitcast <8 x i1> %z to i8
627 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
628 ; SSE2-SSSE3-LABEL: v32i8:
629 ; SSE2-SSSE3: # %bb.0:
630 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
631 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
632 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
633 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
634 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
635 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
636 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx
637 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax
638 ; SSE2-SSSE3-NEXT: shll $16, %eax
639 ; SSE2-SSSE3-NEXT: orl %ecx, %eax
640 ; SSE2-SSSE3-NEXT: retq
644 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
645 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
646 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
647 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
648 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
649 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
650 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1
651 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
652 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2
653 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
654 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
655 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
656 ; AVX1-NEXT: shll $16, %eax
657 ; AVX1-NEXT: orl %ecx, %eax
658 ; AVX1-NEXT: vzeroupper
663 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
664 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
665 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
666 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
667 ; AVX2-NEXT: vzeroupper
670 ; AVX512F-LABEL: v32i8:
672 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
673 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
674 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
675 ; AVX512F-NEXT: vpmovmskb %ymm0, %eax
676 ; AVX512F-NEXT: vzeroupper
679 ; AVX512BW-LABEL: v32i8:
681 ; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
682 ; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
683 ; AVX512BW-NEXT: kmovd %k0, %eax
684 ; AVX512BW-NEXT: vzeroupper
685 ; AVX512BW-NEXT: retq
686 %x0 = icmp sgt <32 x i8> %a, %b
687 %x1 = icmp sgt <32 x i8> %c, %d
688 %y = and <32 x i1> %x0, %x1
689 %res = bitcast <32 x i1> %y to i32