1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
7 ; Lower common integer comparisons such as 'isPositive' efficiently:
8 ; https://llvm.org/bugs/show_bug.cgi?id=26701
10 define <16 x i8> @test_pcmpgtb(<16 x i8> %x) {
11 ; SSE-LABEL: test_pcmpgtb:
13 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
14 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
17 ; AVX-LABEL: test_pcmpgtb:
19 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
20 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
22 %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
23 %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
27 define <8 x i16> @test_pcmpgtw(<8 x i16> %x) {
28 ; SSE-LABEL: test_pcmpgtw:
30 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
31 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
34 ; AVX-LABEL: test_pcmpgtw:
36 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
37 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
39 %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
40 %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
44 define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
45 ; SSE-LABEL: test_pcmpgtd:
47 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
48 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
51 ; AVX-LABEL: test_pcmpgtd:
53 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
54 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
56 %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
57 %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
61 define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
62 ; SSE2-LABEL: test_pcmpgtq:
64 ; SSE2-NEXT: psrad $31, %xmm0
65 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
66 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
67 ; SSE2-NEXT: pxor %xmm1, %xmm0
70 ; SSE42-LABEL: test_pcmpgtq:
72 ; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
73 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
76 ; AVX-LABEL: test_pcmpgtq:
78 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
79 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
81 %sign = ashr <2 x i64> %x, <i64 63, i64 63>
82 %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
86 define <1 x i128> @test_strange_type(<1 x i128> %x) {
87 ; CHECK-LABEL: test_strange_type:
89 ; CHECK-NEXT: movq %rsi, %rax
90 ; CHECK-NEXT: sarq $63, %rax
91 ; CHECK-NEXT: notq %rax
92 ; CHECK-NEXT: movq %rax, %rdx
94 %sign = ashr <1 x i128> %x, <i128 127>
95 %not = xor <1 x i128> %sign, <i128 -1>
99 define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) {
100 ; SSE-LABEL: test_pcmpgtb_256:
102 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
103 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
104 ; SSE-NEXT: pcmpgtb %xmm2, %xmm1
107 ; AVX1-LABEL: test_pcmpgtb_256:
109 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
110 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
111 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
112 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
113 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
114 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
115 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
116 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
119 ; AVX2-LABEL: test_pcmpgtb_256:
121 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
122 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
124 %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
125 %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
129 define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) {
130 ; SSE-LABEL: test_pcmpgtw_256:
132 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
133 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
134 ; SSE-NEXT: pcmpgtw %xmm2, %xmm1
137 ; AVX1-LABEL: test_pcmpgtw_256:
139 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
140 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
141 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
142 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
143 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
144 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
145 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
148 ; AVX2-LABEL: test_pcmpgtw_256:
150 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
151 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
153 %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
154 %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
158 define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
159 ; SSE-LABEL: test_pcmpgtd_256:
161 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
162 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
163 ; SSE-NEXT: pcmpgtd %xmm2, %xmm1
166 ; AVX1-LABEL: test_pcmpgtd_256:
168 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
169 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
170 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
171 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
172 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
173 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
174 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
177 ; AVX2-LABEL: test_pcmpgtd_256:
179 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
180 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
182 %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
183 %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
187 define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
188 ; SSE2-LABEL: test_pcmpgtq_256:
190 ; SSE2-NEXT: psrad $31, %xmm1
191 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
192 ; SSE2-NEXT: psrad $31, %xmm0
193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
194 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
195 ; SSE2-NEXT: pxor %xmm2, %xmm0
196 ; SSE2-NEXT: pxor %xmm2, %xmm1
199 ; SSE42-LABEL: test_pcmpgtq_256:
201 ; SSE42-NEXT: pcmpeqd %xmm2, %xmm2
202 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
203 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm1
206 ; AVX1-LABEL: test_pcmpgtq_256:
208 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
209 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
210 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
211 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
212 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
213 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
214 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
215 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
218 ; AVX2-LABEL: test_pcmpgtq_256:
220 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
221 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
223 %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
224 %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1>
228 define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
229 ; SSE-LABEL: cmpeq_zext_v16i8:
231 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
232 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
235 ; AVX-LABEL: cmpeq_zext_v16i8:
237 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
238 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
240 %cmp = icmp eq <16 x i8> %a, %b
241 %zext = zext <16 x i1> %cmp to <16 x i8>
245 define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) {
246 ; SSE-LABEL: cmpeq_zext_v16i16:
248 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
249 ; SSE-NEXT: psrlw $15, %xmm0
250 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
251 ; SSE-NEXT: psrlw $15, %xmm1
254 ; AVX1-LABEL: cmpeq_zext_v16i16:
256 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
257 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
258 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
259 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
260 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
261 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
264 ; AVX2-LABEL: cmpeq_zext_v16i16:
266 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
267 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
269 %cmp = icmp eq <16 x i16> %a, %b
270 %zext = zext <16 x i1> %cmp to <16 x i16>
274 define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) {
275 ; SSE-LABEL: cmpeq_zext_v4i32:
277 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
278 ; SSE-NEXT: psrld $31, %xmm0
281 ; AVX-LABEL: cmpeq_zext_v4i32:
283 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
284 ; AVX-NEXT: vpsrld $31, %xmm0, %xmm0
286 %cmp = icmp eq <4 x i32> %a, %b
287 %zext = zext <4 x i1> %cmp to <4 x i32>
291 define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) {
292 ; SSE2-LABEL: cmpeq_zext_v4i64:
294 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
295 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
296 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
297 ; SSE2-NEXT: pand %xmm4, %xmm2
298 ; SSE2-NEXT: pand %xmm2, %xmm0
299 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
300 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
301 ; SSE2-NEXT: pand %xmm4, %xmm2
302 ; SSE2-NEXT: pand %xmm2, %xmm1
305 ; SSE42-LABEL: cmpeq_zext_v4i64:
307 ; SSE42-NEXT: pcmpeqq %xmm2, %xmm0
308 ; SSE42-NEXT: psrlq $63, %xmm0
309 ; SSE42-NEXT: pcmpeqq %xmm3, %xmm1
310 ; SSE42-NEXT: psrlq $63, %xmm1
313 ; AVX1-LABEL: cmpeq_zext_v4i64:
315 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
316 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
317 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2
318 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
319 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
320 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
323 ; AVX2-LABEL: cmpeq_zext_v4i64:
325 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
326 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
328 %cmp = icmp eq <4 x i64> %a, %b
329 %zext = zext <4 x i1> %cmp to <4 x i64>
333 define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
334 ; SSE-LABEL: cmpgt_zext_v32i8:
336 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
337 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
338 ; SSE-NEXT: pand %xmm2, %xmm0
339 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
340 ; SSE-NEXT: pand %xmm2, %xmm1
343 ; AVX1-LABEL: cmpgt_zext_v32i8:
345 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
346 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
347 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
348 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
349 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
350 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
353 ; AVX2-LABEL: cmpgt_zext_v32i8:
355 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
356 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
358 %cmp = icmp sgt <32 x i8> %a, %b
359 %zext = zext <32 x i1> %cmp to <32 x i8>
363 define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) {
364 ; SSE-LABEL: cmpgt_zext_v8i16:
366 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
367 ; SSE-NEXT: psrlw $15, %xmm0
370 ; AVX-LABEL: cmpgt_zext_v8i16:
372 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
373 ; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
375 %cmp = icmp sgt <8 x i16> %a, %b
376 %zext = zext <8 x i1> %cmp to <8 x i16>
380 define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) {
381 ; SSE-LABEL: cmpgt_zext_v8i32:
383 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
384 ; SSE-NEXT: psrld $31, %xmm0
385 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
386 ; SSE-NEXT: psrld $31, %xmm1
389 ; AVX1-LABEL: cmpgt_zext_v8i32:
391 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
392 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
393 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
394 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
395 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
396 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
399 ; AVX2-LABEL: cmpgt_zext_v8i32:
401 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
402 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
404 %cmp = icmp sgt <8 x i32> %a, %b
405 %zext = zext <8 x i1> %cmp to <8 x i32>
409 define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
410 ; SSE2-LABEL: cmpgt_zext_v2i64:
412 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
413 ; SSE2-NEXT: pxor %xmm2, %xmm1
414 ; SSE2-NEXT: pxor %xmm2, %xmm0
415 ; SSE2-NEXT: movdqa %xmm0, %xmm2
416 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
417 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
418 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
419 ; SSE2-NEXT: pand %xmm2, %xmm1
420 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
421 ; SSE2-NEXT: por %xmm1, %xmm0
422 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
425 ; SSE42-LABEL: cmpgt_zext_v2i64:
427 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
428 ; SSE42-NEXT: psrlq $63, %xmm0
431 ; AVX-LABEL: cmpgt_zext_v2i64:
433 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
434 ; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0
436 %cmp = icmp sgt <2 x i64> %a, %b
437 %zext = zext <2 x i1> %cmp to <2 x i64>
441 ; Test that we optimize a zext of a vector setcc ne zero where all bits but the
442 ; lsb are known to be zero.
443 define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) {
444 ; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
446 ; SSE2-NEXT: movdqa %xmm0, %xmm1
447 ; SSE2-NEXT: psrlw $15, %xmm1
448 ; SSE2-NEXT: pxor %xmm2, %xmm2
449 ; SSE2-NEXT: movdqa %xmm1, %xmm0
450 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
451 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
454 ; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
456 ; SSE42-NEXT: psrlw $15, %xmm0
457 ; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
458 ; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
459 ; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
460 ; SSE42-NEXT: movdqa %xmm2, %xmm0
463 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
465 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
466 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
467 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
468 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
469 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
472 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
474 ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
475 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
477 %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
478 %b = icmp ne <8 x i16> %a, zeroinitializer
479 %c = zext <8 x i1> %b to <8 x i32>
483 define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) {
484 ; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
486 ; SSE-NEXT: psrld $31, %xmm0
487 ; SSE-NEXT: psrld $31, %xmm1
490 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
492 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
493 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
494 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
495 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
498 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
500 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
502 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
503 %b = icmp ne <8 x i32> %a, zeroinitializer
504 %c = zext <8 x i1> %b to <8 x i32>
508 define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) {
509 ; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
511 ; SSE2-NEXT: psrld $31, %xmm1
512 ; SSE2-NEXT: psrld $31, %xmm0
513 ; SSE2-NEXT: packuswb %xmm1, %xmm0
516 ; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
518 ; SSE42-NEXT: psrld $31, %xmm1
519 ; SSE42-NEXT: psrld $31, %xmm0
520 ; SSE42-NEXT: packusdw %xmm1, %xmm0
523 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
525 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
526 ; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
527 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
528 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
529 ; AVX1-NEXT: vzeroupper
532 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
534 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
535 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
536 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
537 ; AVX2-NEXT: vzeroupper
539 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
540 %b = icmp ne <8 x i32> %a, zeroinitializer
541 %c = zext <8 x i1> %b to <8 x i16>
546 define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) {
547 ; SSE-LABEL: cmpeq_one_mask_bit:
549 ; SSE-NEXT: psrad $31, %xmm0
552 ; AVX-LABEL: cmpeq_one_mask_bit:
554 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
556 %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
557 %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer
558 %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32>
559 ret <4 x i32> %mask_bool_ext