1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
9 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
10 ; SSE2-SSSE3-LABEL: v4i64:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
13 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm3
14 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm1
15 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm9
16 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm9
17 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
18 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
19 ; SSE2-SSSE3-NEXT: pand %xmm9, %xmm1
20 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
21 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
22 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm2
23 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm0
24 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1
25 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
26 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
27 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
28 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
29 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
31 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
32 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm7
33 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm5
34 ; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1
35 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1
36 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5
37 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
38 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
39 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
40 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm1
41 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm6
42 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm4
43 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
44 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm2
45 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4
46 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
47 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
48 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
49 ; SSE2-SSSE3-NEXT: por %xmm3, %xmm2
50 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
51 ; SSE2-SSSE3-NEXT: andps %xmm0, %xmm2
52 ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
53 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
54 ; SSE2-SSSE3-NEXT: retq
58 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
59 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
60 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
61 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
62 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
63 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
64 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
65 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
66 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
67 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
68 ; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
69 ; AVX1-NEXT: vmovmskps %xmm0, %eax
70 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
71 ; AVX1-NEXT: vzeroupper
76 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
77 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
78 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
79 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
80 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
81 ; AVX2-NEXT: vmovmskps %xmm0, %eax
82 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
83 ; AVX2-NEXT: vzeroupper
86 ; AVX512F-LABEL: v4i64:
88 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
89 ; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
90 ; AVX512F-NEXT: kmovw %k0, %eax
91 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
92 ; AVX512F-NEXT: vzeroupper
95 ; AVX512BW-LABEL: v4i64:
97 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
98 ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
99 ; AVX512BW-NEXT: kmovd %k0, %eax
100 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
101 ; AVX512BW-NEXT: vzeroupper
102 ; AVX512BW-NEXT: retq
103 %x0 = icmp sgt <4 x i64> %a, %b
104 %x1 = icmp sgt <4 x i64> %c, %d
105 %y = and <4 x i1> %x0, %x1
106 %res = bitcast <4 x i1> %y to i4
110 define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
111 ; SSE2-SSSE3-LABEL: v4f64:
112 ; SSE2-SSSE3: # %bb.0:
113 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
114 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
115 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
116 ; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7
117 ; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6
118 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
119 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6
120 ; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax
121 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
122 ; SSE2-SSSE3-NEXT: retq
124 ; AVX12-LABEL: v4f64:
126 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
127 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
128 ; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0
129 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
130 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
131 ; AVX12-NEXT: vmovmskps %xmm0, %eax
132 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
133 ; AVX12-NEXT: vzeroupper
136 ; AVX512F-LABEL: v4f64:
138 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1
139 ; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
140 ; AVX512F-NEXT: kmovw %k0, %eax
141 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
142 ; AVX512F-NEXT: vzeroupper
145 ; AVX512BW-LABEL: v4f64:
147 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1
148 ; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
149 ; AVX512BW-NEXT: kmovd %k0, %eax
150 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
151 ; AVX512BW-NEXT: vzeroupper
152 ; AVX512BW-NEXT: retq
153 %x0 = fcmp ogt <4 x double> %a, %b
154 %x1 = fcmp ogt <4 x double> %c, %d
155 %y = and <4 x i1> %x0, %x1
156 %res = bitcast <4 x i1> %y to i4
160 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
161 ; SSE2-SSSE3-LABEL: v16i16:
162 ; SSE2-SSSE3: # %bb.0:
163 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1
164 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0
165 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
166 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5
167 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4
168 ; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4
169 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
170 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
171 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
172 ; SSE2-SSSE3-NEXT: retq
174 ; AVX1-LABEL: v16i16:
176 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm4
177 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
178 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
179 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
180 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
181 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
182 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
183 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
184 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2
185 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
186 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
187 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
188 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
189 ; AVX1-NEXT: vzeroupper
192 ; AVX2-LABEL: v16i16:
194 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
195 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
196 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
197 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
198 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
199 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
200 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
201 ; AVX2-NEXT: vzeroupper
204 ; AVX512F-LABEL: v16i16:
206 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
207 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
208 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
209 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
210 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
211 ; AVX512F-NEXT: kmovw %k0, %eax
212 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
213 ; AVX512F-NEXT: vzeroupper
216 ; AVX512BW-LABEL: v16i16:
218 ; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
219 ; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
220 ; AVX512BW-NEXT: kmovd %k0, %eax
221 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
222 ; AVX512BW-NEXT: vzeroupper
223 ; AVX512BW-NEXT: retq
224 %x0 = icmp sgt <16 x i16> %a, %b
225 %x1 = icmp sgt <16 x i16> %c, %d
226 %y = and <16 x i1> %x0, %x1
227 %res = bitcast <16 x i1> %y to i16
231 define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
232 ; SSE2-SSSE3-LABEL: v8i32_and:
233 ; SSE2-SSSE3: # %bb.0:
234 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
235 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
236 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
237 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
238 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
239 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
240 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
241 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
242 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
243 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
244 ; SSE2-SSSE3-NEXT: retq
246 ; AVX1-LABEL: v8i32_and:
248 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4
249 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
250 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
251 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
252 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
253 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
254 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
255 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
256 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
257 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
258 ; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
259 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
260 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
261 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
262 ; AVX1-NEXT: vzeroupper
265 ; AVX2-LABEL: v8i32_and:
267 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
268 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
269 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
270 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
271 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
272 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
273 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
274 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
275 ; AVX2-NEXT: vzeroupper
278 ; AVX512F-LABEL: v8i32_and:
280 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
281 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
282 ; AVX512F-NEXT: kmovw %k0, %eax
283 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
284 ; AVX512F-NEXT: vzeroupper
287 ; AVX512BW-LABEL: v8i32_and:
289 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
290 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
291 ; AVX512BW-NEXT: kmovd %k0, %eax
292 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
293 ; AVX512BW-NEXT: vzeroupper
294 ; AVX512BW-NEXT: retq
295 %x0 = icmp sgt <8 x i32> %a, %b
296 %x1 = icmp sgt <8 x i32> %c, %d
297 %y = and <8 x i1> %x0, %x1
298 %res = bitcast <8 x i1> %y to i8
302 ; We should see through any bitwise logic op.
304 define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
305 ; SSE2-SSSE3-LABEL: v8i32_or:
306 ; SSE2-SSSE3: # %bb.0:
307 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
308 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
309 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
310 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
311 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
312 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
313 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm4
314 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
315 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
316 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
317 ; SSE2-SSSE3-NEXT: retq
319 ; AVX1-LABEL: v8i32_or:
321 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4
322 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
323 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
324 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
325 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
326 ; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1
327 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
328 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
329 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
330 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
331 ; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
332 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
333 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
334 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
335 ; AVX1-NEXT: vzeroupper
338 ; AVX2-LABEL: v8i32_or:
340 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
341 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
342 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
343 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
344 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
345 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
346 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
347 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
348 ; AVX2-NEXT: vzeroupper
351 ; AVX512F-LABEL: v8i32_or:
353 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
354 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
355 ; AVX512F-NEXT: korw %k1, %k0, %k0
356 ; AVX512F-NEXT: kmovw %k0, %eax
357 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
358 ; AVX512F-NEXT: vzeroupper
361 ; AVX512BW-LABEL: v8i32_or:
363 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
364 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k1
365 ; AVX512BW-NEXT: korw %k1, %k0, %k0
366 ; AVX512BW-NEXT: kmovd %k0, %eax
367 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
368 ; AVX512BW-NEXT: vzeroupper
369 ; AVX512BW-NEXT: retq
370 %x0 = icmp sgt <8 x i32> %a, %b
371 %x1 = icmp sgt <8 x i32> %c, %d
372 %y = or <8 x i1> %x0, %x1
373 %res = bitcast <8 x i1> %y to i8
377 ; We should see through multiple bitwise logic ops.
379 define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) {
380 ; SSE2-SSSE3-LABEL: v8i32_or_and:
381 ; SSE2-SSSE3: # %bb.0:
382 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
383 ; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
384 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
385 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
386 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
387 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
388 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
389 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
390 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm6
391 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm9
392 ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8
393 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
394 ; SSE2-SSSE3-NEXT: pand %xmm6, %xmm8
395 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
396 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
397 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
398 ; SSE2-SSSE3-NEXT: retq
400 ; AVX1-LABEL: v8i32_or_and:
402 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm6
403 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
404 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
405 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
406 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm1
407 ; AVX1-NEXT: vpor %xmm1, %xmm6, %xmm1
408 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
409 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
410 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
411 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
412 ; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
413 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
414 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2
415 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
416 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2
417 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
418 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
419 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
420 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
421 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
422 ; AVX1-NEXT: vzeroupper
425 ; AVX2-LABEL: v8i32_or_and:
427 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
428 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm1
429 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
430 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1
431 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
432 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
433 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
434 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
435 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
436 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
437 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
438 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
439 ; AVX2-NEXT: vzeroupper
442 ; AVX512F-LABEL: v8i32_or_and:
444 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
445 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
446 ; AVX512F-NEXT: korw %k1, %k0, %k1
447 ; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
448 ; AVX512F-NEXT: kmovw %k0, %eax
449 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
450 ; AVX512F-NEXT: vzeroupper
453 ; AVX512BW-LABEL: v8i32_or_and:
455 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
456 ; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
457 ; AVX512BW-NEXT: korw %k1, %k0, %k1
458 ; AVX512BW-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1}
459 ; AVX512BW-NEXT: kmovd %k0, %eax
460 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
461 ; AVX512BW-NEXT: vzeroupper
462 ; AVX512BW-NEXT: retq
463 %x0 = icmp sgt <8 x i32> %a, %b
464 %x1 = icmp slt <8 x i32> %c, %d
465 %x2 = icmp eq <8 x i32> %e, %f
466 %y = or <8 x i1> %x0, %x1
467 %z = and <8 x i1> %y, %x2
468 %res = bitcast <8 x i1> %z to i8
472 define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
473 ; SSE2-SSSE3-LABEL: v8f32_and:
474 ; SSE2-SSSE3: # %bb.0:
475 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
476 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
477 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
478 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
479 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
480 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
481 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
482 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
483 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
484 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
485 ; SSE2-SSSE3-NEXT: retq
487 ; AVX12-LABEL: v8f32_and:
489 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
490 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
491 ; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0
492 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
493 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
494 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
495 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
496 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
497 ; AVX12-NEXT: vzeroupper
500 ; AVX512F-LABEL: v8f32_and:
502 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1
503 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
504 ; AVX512F-NEXT: kmovw %k0, %eax
505 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
506 ; AVX512F-NEXT: vzeroupper
509 ; AVX512BW-LABEL: v8f32_and:
511 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1
512 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
513 ; AVX512BW-NEXT: kmovd %k0, %eax
514 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
515 ; AVX512BW-NEXT: vzeroupper
516 ; AVX512BW-NEXT: retq
517 %x0 = fcmp ogt <8 x float> %a, %b
518 %x1 = fcmp ogt <8 x float> %c, %d
519 %y = and <8 x i1> %x0, %x1
520 %res = bitcast <8 x i1> %y to i8
524 ; We should see through any bitwise logic op.
526 define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
527 ; SSE2-SSSE3-LABEL: v8f32_xor:
528 ; SSE2-SSSE3: # %bb.0:
529 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
530 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
531 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
532 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
533 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
534 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
535 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6
536 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
537 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
538 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
539 ; SSE2-SSSE3-NEXT: retq
541 ; AVX12-LABEL: v8f32_xor:
543 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
544 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
545 ; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0
546 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
547 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
548 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
549 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
550 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
551 ; AVX12-NEXT: vzeroupper
554 ; AVX512F-LABEL: v8f32_xor:
556 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k0
557 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k1
558 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
559 ; AVX512F-NEXT: kmovw %k0, %eax
560 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
561 ; AVX512F-NEXT: vzeroupper
564 ; AVX512BW-LABEL: v8f32_xor:
566 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k0
567 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k1
568 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
569 ; AVX512BW-NEXT: kmovd %k0, %eax
570 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
571 ; AVX512BW-NEXT: vzeroupper
572 ; AVX512BW-NEXT: retq
573 %x0 = fcmp ogt <8 x float> %a, %b
574 %x1 = fcmp ogt <8 x float> %c, %d
575 %y = xor <8 x i1> %x0, %x1
576 %res = bitcast <8 x i1> %y to i8
580 ; We should see through multiple bitwise logic ops.
582 define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) {
583 ; SSE2-SSSE3-LABEL: v8f32_xor_and:
584 ; SSE2-SSSE3: # %bb.0:
585 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
586 ; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
587 ; SSE2-SSSE3-NEXT: cmpnleps %xmm3, %xmm1
588 ; SSE2-SSSE3-NEXT: cmpnleps %xmm2, %xmm0
589 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
590 ; SSE2-SSSE3-NEXT: movaps %xmm5, %xmm1
591 ; SSE2-SSSE3-NEXT: cmpeqps %xmm7, %xmm1
592 ; SSE2-SSSE3-NEXT: cmpunordps %xmm7, %xmm5
593 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm5
594 ; SSE2-SSSE3-NEXT: movaps %xmm4, %xmm1
595 ; SSE2-SSSE3-NEXT: cmpeqps %xmm6, %xmm1
596 ; SSE2-SSSE3-NEXT: cmpunordps %xmm6, %xmm4
597 ; SSE2-SSSE3-NEXT: orps %xmm1, %xmm4
598 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
599 ; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm4
600 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
601 ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
602 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
603 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8
604 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
605 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
606 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
607 ; SSE2-SSSE3-NEXT: retq
609 ; AVX12-LABEL: v8f32_xor_and:
611 ; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0
612 ; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm1
613 ; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0
614 ; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1
615 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
616 ; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
617 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
618 ; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
619 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
620 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
621 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
622 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
623 ; AVX12-NEXT: vzeroupper
626 ; AVX512F-LABEL: v8f32_xor_and:
628 ; AVX512F-NEXT: vcmpnleps %ymm1, %ymm0, %k0
629 ; AVX512F-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
630 ; AVX512F-NEXT: kxorw %k1, %k0, %k1
631 ; AVX512F-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
632 ; AVX512F-NEXT: kmovw %k0, %eax
633 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
634 ; AVX512F-NEXT: vzeroupper
637 ; AVX512BW-LABEL: v8f32_xor_and:
639 ; AVX512BW-NEXT: vcmpnleps %ymm1, %ymm0, %k0
640 ; AVX512BW-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1
641 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1
642 ; AVX512BW-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1}
643 ; AVX512BW-NEXT: kmovd %k0, %eax
644 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
645 ; AVX512BW-NEXT: vzeroupper
646 ; AVX512BW-NEXT: retq
647 %x0 = fcmp ugt <8 x float> %a, %b
648 %x1 = fcmp ueq <8 x float> %c, %d
649 %x2 = fcmp ogt <8 x float> %e, %f
650 %y = xor <8 x i1> %x0, %x1
651 %z = and <8 x i1> %y, %x2
652 %res = bitcast <8 x i1> %z to i8
656 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
657 ; SSE2-SSSE3-LABEL: v32i8:
658 ; SSE2-SSSE3: # %bb.0:
659 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
660 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
661 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
662 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
663 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
664 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
665 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx
666 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax
667 ; SSE2-SSSE3-NEXT: shll $16, %eax
668 ; SSE2-SSSE3-NEXT: orl %ecx, %eax
669 ; SSE2-SSSE3-NEXT: retq
673 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
674 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
675 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
676 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
677 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
678 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
679 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1
680 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
681 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2
682 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
683 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
684 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
685 ; AVX1-NEXT: shll $16, %eax
686 ; AVX1-NEXT: orl %ecx, %eax
687 ; AVX1-NEXT: vzeroupper
692 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
693 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
694 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
695 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
696 ; AVX2-NEXT: vzeroupper
699 ; AVX512F-LABEL: v32i8:
701 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
702 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
703 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
704 ; AVX512F-NEXT: vpmovmskb %ymm0, %eax
705 ; AVX512F-NEXT: vzeroupper
708 ; AVX512BW-LABEL: v32i8:
710 ; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
711 ; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
712 ; AVX512BW-NEXT: kmovd %k0, %eax
713 ; AVX512BW-NEXT: vzeroupper
714 ; AVX512BW-NEXT: retq
715 %x0 = icmp sgt <32 x i8> %a, %b
716 %x1 = icmp sgt <32 x i8> %c, %d
717 %y = and <32 x i1> %x0, %x1
718 %res = bitcast <32 x i1> %y to i32