1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
8 define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
11 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
12 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
13 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
14 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
15 ; SSE-NEXT: pcmpgtq %xmm7, %xmm3
16 ; SSE-NEXT: pcmpgtq %xmm6, %xmm2
17 ; SSE-NEXT: packssdw %xmm3, %xmm2
18 ; SSE-NEXT: pcmpgtq %xmm5, %xmm1
19 ; SSE-NEXT: pcmpgtq %xmm4, %xmm0
20 ; SSE-NEXT: packssdw %xmm1, %xmm0
21 ; SSE-NEXT: packssdw %xmm2, %xmm0
22 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
23 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
24 ; SSE-NEXT: packssdw %xmm11, %xmm10
25 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
26 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
27 ; SSE-NEXT: packssdw %xmm9, %xmm8
28 ; SSE-NEXT: packssdw %xmm10, %xmm8
29 ; SSE-NEXT: pand %xmm0, %xmm8
30 ; SSE-NEXT: packsswb %xmm8, %xmm8
31 ; SSE-NEXT: pmovmskb %xmm8, %eax
32 ; SSE-NEXT: # kill: def $al killed $al killed $eax
37 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8
38 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm9
39 ; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
40 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
41 ; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm5
42 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7
43 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8
44 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm8, %xmm7
45 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm4
46 ; AVX1-NEXT: vpackssdw %xmm7, %xmm4, %xmm4
47 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
48 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
49 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
50 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
51 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
52 ; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
53 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
54 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
55 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
56 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
57 ; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
58 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
59 ; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
60 ; AVX1-NEXT: vmovmskps %ymm0, %eax
61 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
62 ; AVX1-NEXT: vzeroupper
67 ; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm5
68 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
69 ; AVX2-NEXT: vpand %ymm5, %ymm1, %ymm1
70 ; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm3
71 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
72 ; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
73 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
74 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
75 ; AVX2-NEXT: vmovmskps %ymm0, %eax
76 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
77 ; AVX2-NEXT: vzeroupper
80 ; AVX512F-LABEL: v8i64:
82 ; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
83 ; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
84 ; AVX512F-NEXT: kmovw %k0, %eax
85 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
86 ; AVX512F-NEXT: vzeroupper
89 ; AVX512BW-LABEL: v8i64:
91 ; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
92 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
93 ; AVX512BW-NEXT: kmovd %k0, %eax
94 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
95 ; AVX512BW-NEXT: vzeroupper
97 %x0 = icmp sgt <8 x i64> %a, %b
98 %x1 = icmp sgt <8 x i64> %c, %d
99 %y = and <8 x i1> %x0, %x1
100 %res = bitcast <8 x i1> %y to i8
104 define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
107 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
108 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
109 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
110 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
111 ; SSE-NEXT: cmpltpd %xmm3, %xmm7
112 ; SSE-NEXT: cmpltpd %xmm2, %xmm6
113 ; SSE-NEXT: packssdw %xmm7, %xmm6
114 ; SSE-NEXT: cmpltpd %xmm1, %xmm5
115 ; SSE-NEXT: cmpltpd %xmm0, %xmm4
116 ; SSE-NEXT: packssdw %xmm5, %xmm4
117 ; SSE-NEXT: packssdw %xmm6, %xmm4
118 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
119 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
120 ; SSE-NEXT: packssdw %xmm11, %xmm10
121 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
122 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
123 ; SSE-NEXT: packssdw %xmm9, %xmm8
124 ; SSE-NEXT: packssdw %xmm10, %xmm8
125 ; SSE-NEXT: pand %xmm4, %xmm8
126 ; SSE-NEXT: packsswb %xmm8, %xmm8
127 ; SSE-NEXT: pmovmskb %xmm8, %eax
128 ; SSE-NEXT: # kill: def $al killed $al killed $eax
133 ; AVX1-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5
134 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7
135 ; AVX1-NEXT: vpackssdw %xmm7, %xmm5, %xmm5
136 ; AVX1-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4
137 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6
138 ; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4
139 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
140 ; AVX1-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
141 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
142 ; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
143 ; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
145 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
146 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
147 ; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
148 ; AVX1-NEXT: vmovmskps %ymm0, %eax
149 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
150 ; AVX1-NEXT: vzeroupper
155 ; AVX2-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5
156 ; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
157 ; AVX2-NEXT: vandpd %ymm5, %ymm1, %ymm1
158 ; AVX2-NEXT: vcmpltpd %ymm4, %ymm6, %ymm3
159 ; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
160 ; AVX2-NEXT: vandpd %ymm3, %ymm0, %ymm0
161 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
162 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
163 ; AVX2-NEXT: vmovmskps %ymm0, %eax
164 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
165 ; AVX2-NEXT: vzeroupper
168 ; AVX512F-LABEL: v8f64:
170 ; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
171 ; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
172 ; AVX512F-NEXT: kmovw %k0, %eax
173 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
174 ; AVX512F-NEXT: vzeroupper
177 ; AVX512BW-LABEL: v8f64:
179 ; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
180 ; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
181 ; AVX512BW-NEXT: kmovd %k0, %eax
182 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
183 ; AVX512BW-NEXT: vzeroupper
184 ; AVX512BW-NEXT: retq
185 %x0 = fcmp ogt <8 x double> %a, %b
186 %x1 = fcmp ogt <8 x double> %c, %d
187 %y = and <8 x i1> %x0, %x1
188 %res = bitcast <8 x i1> %y to i8
192 define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
195 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
196 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
197 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
198 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
199 ; SSE-NEXT: pcmpgtw %xmm4, %xmm0
200 ; SSE-NEXT: pcmpgtw %xmm5, %xmm1
201 ; SSE-NEXT: pcmpgtw %xmm6, %xmm2
202 ; SSE-NEXT: pcmpgtw %xmm7, %xmm3
203 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
204 ; SSE-NEXT: pand %xmm0, %xmm11
205 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
206 ; SSE-NEXT: pand %xmm1, %xmm10
207 ; SSE-NEXT: packsswb %xmm10, %xmm11
208 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
209 ; SSE-NEXT: pand %xmm2, %xmm9
210 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
211 ; SSE-NEXT: pand %xmm3, %xmm8
212 ; SSE-NEXT: packsswb %xmm8, %xmm9
213 ; SSE-NEXT: pmovmskb %xmm11, %ecx
214 ; SSE-NEXT: pmovmskb %xmm9, %eax
215 ; SSE-NEXT: shll $16, %eax
216 ; SSE-NEXT: orl %ecx, %eax
219 ; AVX1-LABEL: v32i16:
221 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm8
222 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
223 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
224 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
225 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm3
226 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
227 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
228 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
229 ; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
230 ; AVX1-NEXT: vpand %xmm2, %xmm8, %xmm2
231 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
232 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5
233 ; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm5
234 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
235 ; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
236 ; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm2
237 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
238 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm3
239 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4
240 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3
241 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
242 ; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm0
243 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
244 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
245 ; AVX1-NEXT: shll $16, %eax
246 ; AVX1-NEXT: orl %ecx, %eax
247 ; AVX1-NEXT: vzeroupper
250 ; AVX2-LABEL: v32i16:
252 ; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
253 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
254 ; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
255 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
256 ; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2
257 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
258 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
259 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
260 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
261 ; AVX2-NEXT: vzeroupper
264 ; AVX512F-LABEL: v32i16:
266 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
267 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
268 ; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4
269 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
270 ; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm1
271 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5
272 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm5, %ymm1
273 ; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm1
274 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
275 ; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
276 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
277 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
278 ; AVX512F-NEXT: kmovw %k0, %ecx
279 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm0
280 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
281 ; AVX512F-NEXT: kmovw %k0, %eax
282 ; AVX512F-NEXT: shll $16, %eax
283 ; AVX512F-NEXT: orl %ecx, %eax
284 ; AVX512F-NEXT: vzeroupper
287 ; AVX512BW-LABEL: v32i16:
289 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
290 ; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
291 ; AVX512BW-NEXT: kmovd %k0, %eax
292 ; AVX512BW-NEXT: vzeroupper
293 ; AVX512BW-NEXT: retq
294 %x0 = icmp sgt <32 x i16> %a, %b
295 %x1 = icmp sgt <32 x i16> %c, %d
296 %y = and <32 x i1> %x0, %x1
297 %res = bitcast <32 x i1> %y to i32
301 define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
304 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
305 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
306 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
307 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
308 ; SSE-NEXT: pcmpgtd %xmm4, %xmm0
309 ; SSE-NEXT: pcmpgtd %xmm5, %xmm1
310 ; SSE-NEXT: pcmpgtd %xmm6, %xmm2
311 ; SSE-NEXT: pcmpgtd %xmm7, %xmm3
312 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
313 ; SSE-NEXT: pand %xmm0, %xmm11
314 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
315 ; SSE-NEXT: pand %xmm1, %xmm10
316 ; SSE-NEXT: packssdw %xmm10, %xmm11
317 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
318 ; SSE-NEXT: pand %xmm2, %xmm9
319 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
320 ; SSE-NEXT: pand %xmm3, %xmm8
321 ; SSE-NEXT: packssdw %xmm8, %xmm9
322 ; SSE-NEXT: packsswb %xmm9, %xmm11
323 ; SSE-NEXT: pmovmskb %xmm11, %eax
324 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
327 ; AVX1-LABEL: v16i32:
329 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm8
330 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
331 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
332 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
333 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm2
334 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
335 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
336 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
337 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3
338 ; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3
339 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6
340 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4
341 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm4
342 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
343 ; AVX1-NEXT: vpackssdw %xmm0, %xmm3, %xmm0
344 ; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm3
345 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
346 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm3
347 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm4
348 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
349 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
350 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
351 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
352 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
353 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
354 ; AVX1-NEXT: vzeroupper
357 ; AVX2-LABEL: v16i32:
359 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
360 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
361 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
362 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
363 ; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm2
364 ; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm3
365 ; AVX2-NEXT: vpackssdw %ymm2, %ymm3, %ymm2
366 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
367 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
368 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
369 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
370 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
371 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
372 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
373 ; AVX2-NEXT: vzeroupper
376 ; AVX512F-LABEL: v16i32:
378 ; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
379 ; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
380 ; AVX512F-NEXT: kmovw %k0, %eax
381 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
382 ; AVX512F-NEXT: vzeroupper
385 ; AVX512BW-LABEL: v16i32:
387 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
388 ; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
389 ; AVX512BW-NEXT: kmovd %k0, %eax
390 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
391 ; AVX512BW-NEXT: vzeroupper
392 ; AVX512BW-NEXT: retq
393 %x0 = icmp sgt <16 x i32> %a, %b
394 %x1 = icmp sgt <16 x i32> %c, %d
395 %y = and <16 x i1> %x0, %x1
396 %res = bitcast <16 x i1> %y to i16
400 define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
403 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
404 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
405 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
406 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
407 ; SSE-NEXT: cmpltps %xmm0, %xmm4
408 ; SSE-NEXT: cmpltps %xmm1, %xmm5
409 ; SSE-NEXT: cmpltps %xmm2, %xmm6
410 ; SSE-NEXT: cmpltps %xmm3, %xmm7
411 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
412 ; SSE-NEXT: andps %xmm4, %xmm11
413 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
414 ; SSE-NEXT: andps %xmm5, %xmm10
415 ; SSE-NEXT: packssdw %xmm10, %xmm11
416 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
417 ; SSE-NEXT: andps %xmm6, %xmm9
418 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
419 ; SSE-NEXT: andps %xmm7, %xmm8
420 ; SSE-NEXT: packssdw %xmm8, %xmm9
421 ; SSE-NEXT: packsswb %xmm9, %xmm11
422 ; SSE-NEXT: pmovmskb %xmm11, %eax
423 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
426 ; AVX1-LABEL: v16f32:
428 ; AVX1-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
429 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
430 ; AVX1-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
431 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
432 ; AVX1-NEXT: vcmpltps %ymm4, %ymm6, %ymm4
433 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6
434 ; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
435 ; AVX1-NEXT: vcmpltps %ymm5, %ymm7, %ymm5
436 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6
437 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
438 ; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1
439 ; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
440 ; AVX1-NEXT: vandps %xmm4, %xmm0, %xmm0
441 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
442 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
443 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
444 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
445 ; AVX1-NEXT: vzeroupper
448 ; AVX2-LABEL: v16f32:
450 ; AVX2-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
451 ; AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
452 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
453 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
454 ; AVX2-NEXT: vcmpltps %ymm5, %ymm7, %ymm2
455 ; AVX2-NEXT: vcmpltps %ymm4, %ymm6, %ymm3
456 ; AVX2-NEXT: vpackssdw %ymm2, %ymm3, %ymm2
457 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
458 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
459 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
460 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
461 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
462 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
463 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
464 ; AVX2-NEXT: vzeroupper
467 ; AVX512F-LABEL: v16f32:
469 ; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
470 ; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
471 ; AVX512F-NEXT: kmovw %k0, %eax
472 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
473 ; AVX512F-NEXT: vzeroupper
476 ; AVX512BW-LABEL: v16f32:
478 ; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
479 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
480 ; AVX512BW-NEXT: kmovd %k0, %eax
481 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
482 ; AVX512BW-NEXT: vzeroupper
483 ; AVX512BW-NEXT: retq
484 %x0 = fcmp ogt <16 x float> %a, %b
485 %x1 = fcmp ogt <16 x float> %c, %d
486 %y = and <16 x i1> %x0, %x1
487 %res = bitcast <16 x i1> %y to i16
491 define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
494 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
495 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
496 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
497 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
498 ; SSE-NEXT: pcmpgtb %xmm4, %xmm0
499 ; SSE-NEXT: pcmpgtb %xmm5, %xmm1
500 ; SSE-NEXT: pcmpgtb %xmm6, %xmm2
501 ; SSE-NEXT: pcmpgtb %xmm7, %xmm3
502 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
503 ; SSE-NEXT: pand %xmm0, %xmm11
504 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
505 ; SSE-NEXT: pand %xmm1, %xmm10
506 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
507 ; SSE-NEXT: pand %xmm2, %xmm9
508 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
509 ; SSE-NEXT: pand %xmm3, %xmm8
510 ; SSE-NEXT: pmovmskb %xmm11, %eax
511 ; SSE-NEXT: pmovmskb %xmm10, %ecx
512 ; SSE-NEXT: shll $16, %ecx
513 ; SSE-NEXT: orl %eax, %ecx
514 ; SSE-NEXT: pmovmskb %xmm9, %edx
515 ; SSE-NEXT: pmovmskb %xmm8, %eax
516 ; SSE-NEXT: shll $16, %eax
517 ; SSE-NEXT: orl %edx, %eax
518 ; SSE-NEXT: shlq $32, %rax
519 ; SSE-NEXT: orq %rcx, %rax
524 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
525 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
526 ; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
527 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
528 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
529 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9
530 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm9, %xmm3
531 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
532 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm2
533 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm9
534 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm9, %xmm2
535 ; AVX1-NEXT: vpand %xmm2, %xmm8, %xmm2
536 ; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm5
537 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
538 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm5
539 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm7
540 ; AVX1-NEXT: vpcmpgtb %xmm5, %xmm7, %xmm5
541 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
542 ; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm4
543 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
544 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
545 ; AVX1-NEXT: vpmovmskb %xmm3, %ecx
546 ; AVX1-NEXT: shll $16, %ecx
547 ; AVX1-NEXT: orl %eax, %ecx
548 ; AVX1-NEXT: vpmovmskb %xmm1, %edx
549 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
550 ; AVX1-NEXT: shll $16, %eax
551 ; AVX1-NEXT: orl %edx, %eax
552 ; AVX1-NEXT: shlq $32, %rax
553 ; AVX1-NEXT: orq %rcx, %rax
554 ; AVX1-NEXT: vzeroupper
559 ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
560 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
561 ; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
562 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
563 ; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
564 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
565 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
566 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
567 ; AVX2-NEXT: shlq $32, %rax
568 ; AVX2-NEXT: orq %rcx, %rax
569 ; AVX2-NEXT: vzeroupper
572 ; AVX512F-LABEL: v64i8:
574 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
575 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
576 ; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm5, %ymm4
577 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
578 ; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm1
579 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5
580 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm5, %ymm1
581 ; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm1
582 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2
583 ; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
584 ; AVX512F-NEXT: vpmovmskb %ymm0, %ecx
585 ; AVX512F-NEXT: vpmovmskb %ymm1, %eax
586 ; AVX512F-NEXT: shlq $32, %rax
587 ; AVX512F-NEXT: orq %rcx, %rax
588 ; AVX512F-NEXT: vzeroupper
591 ; AVX512BW-LABEL: v64i8:
593 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
594 ; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
595 ; AVX512BW-NEXT: kmovq %k0, %rax
596 ; AVX512BW-NEXT: vzeroupper
597 ; AVX512BW-NEXT: retq
598 %x0 = icmp sgt <64 x i8> %a, %b
599 %x1 = icmp sgt <64 x i8> %c, %d
600 %y = and <64 x i1> %x0, %x1
601 %res = bitcast <64 x i1> %y to i64
604 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: