1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
9 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
10 ; SSE2-SSSE3-LABEL: v4i64:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
13 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm3
14 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm1
15 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm9
16 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm9
17 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
18 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
19 ; SSE2-SSSE3-NEXT: pand %xmm9, %xmm1
20 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
21 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
22 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm2
23 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm0
24 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1
25 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
26 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
27 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
28 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
29 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
31 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
32 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm7
33 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm5
34 ; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1
35 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1
36 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5
37 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
38 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
39 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
40 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm1
41 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm6
42 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm4
43 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
44 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm2
45 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4
46 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
47 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
48 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
49 ; SSE2-SSSE3-NEXT: por %xmm3, %xmm2
50 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
51 ; SSE2-SSSE3-NEXT: andps %xmm0, %xmm2
52 ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
53 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
54 ; SSE2-SSSE3-NEXT: retq
58 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
59 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
60 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
61 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
62 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
63 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
64 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
65 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
66 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
67 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
68 ; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
69 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
70 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
71 ; AVX1-NEXT: vzeroupper
76 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
77 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
78 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
79 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
80 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
81 ; AVX2-NEXT: vzeroupper
84 ; AVX512F-LABEL: v4i64:
86 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
87 ; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
88 ; AVX512F-NEXT: kmovw %k0, %eax
89 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
90 ; AVX512F-NEXT: vzeroupper
93 ; AVX512BW-LABEL: v4i64:
95 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
96 ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
97 ; AVX512BW-NEXT: kmovd %k0, %eax
98 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
99 ; AVX512BW-NEXT: vzeroupper
100 ; AVX512BW-NEXT: retq
101 %x0 = icmp sgt <4 x i64> %a, %b
102 %x1 = icmp sgt <4 x i64> %c, %d
103 %y = and <4 x i1> %x0, %x1
104 %res = bitcast <4 x i1> %y to i4
108 define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
109 ; SSE2-SSSE3-LABEL: v4f64:
110 ; SSE2-SSSE3: # %bb.0:
111 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
112 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
113 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
114 ; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7
115 ; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6
116 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
117 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6
118 ; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax
119 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
120 ; SSE2-SSSE3-NEXT: retq
122 ; AVX12-LABEL: v4f64:
124 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
125 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
126 ; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0
127 ; AVX12-NEXT: vmovmskpd %ymm0, %eax
128 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
129 ; AVX12-NEXT: vzeroupper
132 ; AVX512F-LABEL: v4f64:
134 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1
135 ; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
136 ; AVX512F-NEXT: kmovw %k0, %eax
137 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
138 ; AVX512F-NEXT: vzeroupper
141 ; AVX512BW-LABEL: v4f64:
143 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1
144 ; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
145 ; AVX512BW-NEXT: kmovd %k0, %eax
146 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
147 ; AVX512BW-NEXT: vzeroupper
148 ; AVX512BW-NEXT: retq
149 %x0 = fcmp ogt <4 x double> %a, %b
150 %x1 = fcmp ogt <4 x double> %c, %d
151 %y = and <4 x i1> %x0, %x1
152 %res = bitcast <4 x i1> %y to i4
156 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
157 ; SSE2-SSSE3-LABEL: v16i16:
158 ; SSE2-SSSE3: # %bb.0:
159 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1
160 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0
161 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
162 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5
163 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4
164 ; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4
165 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
166 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
167 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
168 ; SSE2-SSSE3-NEXT: retq
170 ; AVX1-LABEL: v16i16:
172 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm4
173 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
175 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
176 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
177 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
178 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
179 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
180 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2
181 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
182 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
183 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
184 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
185 ; AVX1-NEXT: vzeroupper
188 ; AVX2-LABEL: v16i16:
190 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
191 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
192 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
193 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
194 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
195 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
196 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
197 ; AVX2-NEXT: vzeroupper
200 ; AVX512F-LABEL: v16i16:
202 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
203 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
204 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
205 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
206 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
207 ; AVX512F-NEXT: kmovw %k0, %eax
208 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
209 ; AVX512F-NEXT: vzeroupper
212 ; AVX512BW-LABEL: v16i16:
214 ; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
215 ; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
216 ; AVX512BW-NEXT: kmovd %k0, %eax
217 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
218 ; AVX512BW-NEXT: vzeroupper
219 ; AVX512BW-NEXT: retq
220 %x0 = icmp sgt <16 x i16> %a, %b
221 %x1 = icmp sgt <16 x i16> %c, %d
222 %y = and <16 x i1> %x0, %x1
223 %res = bitcast <16 x i1> %y to i16
227 define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
228 ; SSE2-SSSE3-LABEL: v8i32_and:
229 ; SSE2-SSSE3: # %bb.0:
230 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
231 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
232 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
233 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
234 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
235 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
236 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
237 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
238 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
239 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
240 ; SSE2-SSSE3-NEXT: retq
242 ; AVX1-LABEL: v8i32_and:
244 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4
245 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
246 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
247 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
248 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
249 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
250 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
251 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
252 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
253 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
254 ; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
255 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
256 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
257 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
258 ; AVX1-NEXT: vzeroupper
261 ; AVX2-LABEL: v8i32_and:
263 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
264 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
265 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
266 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
267 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
268 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
269 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
270 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
271 ; AVX2-NEXT: vzeroupper
274 ; AVX512F-LABEL: v8i32_and:
276 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
277 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
278 ; AVX512F-NEXT: kmovw %k0, %eax
279 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
280 ; AVX512F-NEXT: vzeroupper
283 ; AVX512BW-LABEL: v8i32_and:
285 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
286 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
287 ; AVX512BW-NEXT: kmovd %k0, %eax
288 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
289 ; AVX512BW-NEXT: vzeroupper
290 ; AVX512BW-NEXT: retq
291 %x0 = icmp sgt <8 x i32> %a, %b
292 %x1 = icmp sgt <8 x i32> %c, %d
293 %y = and <8 x i1> %x0, %x1
294 %res = bitcast <8 x i1> %y to i8
298 ; We should see through any bitwise logic op.
300 define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
301 ; SSE2-SSSE3-LABEL: v8i32_or:
302 ; SSE2-SSSE3: # %bb.0:
303 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
304 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
305 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
306 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
307 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
308 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
309 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm4
310 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
311 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
312 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
313 ; SSE2-SSSE3-NEXT: retq
315 ; AVX1-LABEL: v8i32_or:
317 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4
318 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
319 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
320 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
321 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
322 ; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1
323 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
324 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
325 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
326 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
327 ; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
328 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
329 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
330 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
331 ; AVX1-NEXT: vzeroupper
334 ; AVX2-LABEL: v8i32_or:
336 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
337 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
338 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
339 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
340 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
341 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
342 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
343 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
344 ; AVX2-NEXT: vzeroupper
347 ; AVX512F-LABEL: v8i32_or:
349 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
350 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
351 ; AVX512F-NEXT: korw %k1, %k0, %k0
352 ; AVX512F-NEXT: kmovw %k0, %eax
353 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
354 ; AVX512F-NEXT: vzeroupper
357 ; AVX512BW-LABEL: v8i32_or:
359 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
360 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
361 ; AVX512BW-NEXT: korw %k1, %k0, %k0
362 ; AVX512BW-NEXT: kmovd %k0, %eax
363 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
364 ; AVX512BW-NEXT: vzeroupper
365 ; AVX512BW-NEXT: retq
366 %x0 = icmp sgt <8 x i32> %a, %b
367 %x1 = icmp sgt <8 x i32> %c, %d
368 %y = or <8 x i1> %x0, %x1
369 %res = bitcast <8 x i1> %y to i8
373 ; We should see through multiple bitwise logic ops.
375 define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) {
376 ; SSE2-SSSE3-LABEL: v8i32_or_and:
377 ; SSE2-SSSE3: # %bb.0:
378 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
379 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
380 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
381 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
382 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
383 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
384 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
385 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
386 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm6
387 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm9
388 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8
389 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
390 ; SSE2-SSSE3-NEXT: pand %xmm6, %xmm8
391 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
392 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
393 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
394 ; SSE2-SSSE3-NEXT: retq
396 ; AVX1-LABEL: v8i32_or_and:
398 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm6
399 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
400 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
401 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
402 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm1
403 ; AVX1-NEXT: vpor %xmm1, %xmm6, %xmm1
404 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
405 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
406 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
407 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
408 ; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
409 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
410 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2
411 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
412 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2
413 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
414 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
415 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
416 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
417 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
418 ; AVX1-NEXT: vzeroupper
421 ; AVX2-LABEL: v8i32_or_and:
423 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
424 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm1
425 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
426 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1
427 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
428 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
429 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
430 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
431 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
432 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
433 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
434 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
435 ; AVX2-NEXT: vzeroupper
438 ; AVX512F-LABEL: v8i32_or_and:
440 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
441 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
442 ; AVX512F-NEXT: korw %k1, %k0, %k1
443 ; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
444 ; AVX512F-NEXT: kmovw %k0, %eax
445 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
446 ; AVX512F-NEXT: vzeroupper
449 ; AVX512BW-LABEL: v8i32_or_and:
451 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
452 ; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
453 ; AVX512BW-NEXT: korw %k1, %k0, %k1
454 ; AVX512BW-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
455 ; AVX512BW-NEXT: kmovd %k0, %eax
456 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
457 ; AVX512BW-NEXT: vzeroupper
458 ; AVX512BW-NEXT: retq
459 %x0 = icmp sgt <8 x i32> %a, %b
460 %x1 = icmp slt <8 x i32> %c, %d
461 %x2 = icmp eq <8 x i32> %e, %f
462 %y = or <8 x i1> %x0, %x1
463 %z = and <8 x i1> %y, %x2
464 %res = bitcast <8 x i1> %z to i8
468 define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
469 ; SSE2-SSSE3-LABEL: v8f32_and:
470 ; SSE2-SSSE3: # %bb.0:
471 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
472 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
473 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
474 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
475 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
476 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
477 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
478 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
479 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
480 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
481 ; SSE2-SSSE3-NEXT: retq
483 ; AVX12-LABEL: v8f32_and:
485 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
486 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
487 ; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0
488 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
489 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
490 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
491 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
492 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
493 ; AVX12-NEXT: vzeroupper
496 ; AVX512F-LABEL: v8f32_and:
498 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1
499 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
500 ; AVX512F-NEXT: kmovw %k0, %eax
501 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
502 ; AVX512F-NEXT: vzeroupper
505 ; AVX512BW-LABEL: v8f32_and:
507 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1
508 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
509 ; AVX512BW-NEXT: kmovd %k0, %eax
510 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
511 ; AVX512BW-NEXT: vzeroupper
512 ; AVX512BW-NEXT: retq
513 %x0 = fcmp ogt <8 x float> %a, %b
514 %x1 = fcmp ogt <8 x float> %c, %d
515 %y = and <8 x i1> %x0, %x1
516 %res = bitcast <8 x i1> %y to i8
520 ; We should see through any bitwise logic op.
522 define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
523 ; SSE2-SSSE3-LABEL: v8f32_xor:
524 ; SSE2-SSSE3: # %bb.0:
525 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
526 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
527 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
528 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
529 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
530 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
531 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6
532 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
533 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
534 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
535 ; SSE2-SSSE3-NEXT: retq
537 ; AVX12-LABEL: v8f32_xor:
539 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
540 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
541 ; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0
542 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
543 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
544 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
545 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
546 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
547 ; AVX12-NEXT: vzeroupper
550 ; AVX512F-LABEL: v8f32_xor:
552 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k0
553 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k1
554 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
555 ; AVX512F-NEXT: kmovw %k0, %eax
556 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
557 ; AVX512F-NEXT: vzeroupper
560 ; AVX512BW-LABEL: v8f32_xor:
562 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k0
563 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k1
564 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
565 ; AVX512BW-NEXT: kmovd %k0, %eax
566 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
567 ; AVX512BW-NEXT: vzeroupper
568 ; AVX512BW-NEXT: retq
569 %x0 = fcmp ogt <8 x float> %a, %b
570 %x1 = fcmp ogt <8 x float> %c, %d
571 %y = xor <8 x i1> %x0, %x1
572 %res = bitcast <8 x i1> %y to i8
576 ; We should see through multiple bitwise logic ops.
578 define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) {
579 ; SSE2-SSSE3-LABEL: v8f32_xor_and:
580 ; SSE2-SSSE3: # %bb.0:
581 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
582 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
583 ; SSE2-SSSE3-NEXT: cmpnleps %xmm3, %xmm1
584 ; SSE2-SSSE3-NEXT: cmpnleps %xmm2, %xmm0
585 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
586 ; SSE2-SSSE3-NEXT: movaps %xmm5, %xmm1
587 ; SSE2-SSSE3-NEXT: cmpeqps %xmm7, %xmm1
588 ; SSE2-SSSE3-NEXT: cmpunordps %xmm7, %xmm5
589 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm5
590 ; SSE2-SSSE3-NEXT: movaps %xmm4, %xmm1
591 ; SSE2-SSSE3-NEXT: cmpeqps %xmm6, %xmm1
592 ; SSE2-SSSE3-NEXT: cmpunordps %xmm6, %xmm4
593 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm4
594 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
595 ; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm4
596 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
597 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
598 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
599 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8
600 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
601 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
602 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
603 ; SSE2-SSSE3-NEXT: retq
605 ; AVX12-LABEL: v8f32_xor_and:
607 ; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0
608 ; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm1
609 ; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0
610 ; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1
611 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
612 ; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
613 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
614 ; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
615 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
616 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
617 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
618 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
619 ; AVX12-NEXT: vzeroupper
622 ; AVX512F-LABEL: v8f32_xor_and:
624 ; AVX512F-NEXT: vcmpnleps %ymm1, %ymm0, %k0
625 ; AVX512F-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
626 ; AVX512F-NEXT: kxorw %k1, %k0, %k1
627 ; AVX512F-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
628 ; AVX512F-NEXT: kmovw %k0, %eax
629 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
630 ; AVX512F-NEXT: vzeroupper
633 ; AVX512BW-LABEL: v8f32_xor_and:
635 ; AVX512BW-NEXT: vcmpnleps %ymm1, %ymm0, %k0
636 ; AVX512BW-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
637 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1
638 ; AVX512BW-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
639 ; AVX512BW-NEXT: kmovd %k0, %eax
640 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
641 ; AVX512BW-NEXT: vzeroupper
642 ; AVX512BW-NEXT: retq
643 %x0 = fcmp ugt <8 x float> %a, %b
644 %x1 = fcmp ueq <8 x float> %c, %d
645 %x2 = fcmp ogt <8 x float> %e, %f
646 %y = xor <8 x i1> %x0, %x1
647 %z = and <8 x i1> %y, %x2
648 %res = bitcast <8 x i1> %z to i8
652 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
653 ; SSE2-SSSE3-LABEL: v32i8:
654 ; SSE2-SSSE3: # %bb.0:
655 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
656 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
657 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
658 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
659 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
660 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
661 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx
662 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax
663 ; SSE2-SSSE3-NEXT: shll $16, %eax
664 ; SSE2-SSSE3-NEXT: orl %ecx, %eax
665 ; SSE2-SSSE3-NEXT: retq
669 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
670 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
671 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
672 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
673 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
674 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
675 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1
676 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
677 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2
678 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
679 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
680 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
681 ; AVX1-NEXT: shll $16, %eax
682 ; AVX1-NEXT: orl %ecx, %eax
683 ; AVX1-NEXT: vzeroupper
688 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
689 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
690 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
691 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
692 ; AVX2-NEXT: vzeroupper
695 ; AVX512F-LABEL: v32i8:
697 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
698 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
699 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
700 ; AVX512F-NEXT: vpmovmskb %ymm0, %eax
701 ; AVX512F-NEXT: vzeroupper
704 ; AVX512BW-LABEL: v32i8:
706 ; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
707 ; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
708 ; AVX512BW-NEXT: kmovd %k0, %eax
709 ; AVX512BW-NEXT: vzeroupper
710 ; AVX512BW-NEXT: retq
711 %x0 = icmp sgt <32 x i8> %a, %b
712 %x1 = icmp sgt <32 x i8> %c, %d
713 %y = and <32 x i1> %x0, %x1
714 %res = bitcast <32 x i1> %y to i32