1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
7 ; Lower common integer comparisons such as 'isPositive' efficiently:
8 ; https://llvm.org/bugs/show_bug.cgi?id=26701
10 define <16 x i8> @test_pcmpgtb(<16 x i8> %x) {
11 ; SSE-LABEL: test_pcmpgtb:
13 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
14 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
17 ; AVX-LABEL: test_pcmpgtb:
19 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
20 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
22 %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
23 %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
27 define <8 x i16> @test_pcmpgtw(<8 x i16> %x) {
28 ; SSE-LABEL: test_pcmpgtw:
30 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
31 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
34 ; AVX-LABEL: test_pcmpgtw:
36 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
37 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
39 %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
40 %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
44 define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
45 ; SSE-LABEL: test_pcmpgtd:
47 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
48 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
51 ; AVX-LABEL: test_pcmpgtd:
53 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
54 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
56 %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
57 %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
61 define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
62 ; SSE2-LABEL: test_pcmpgtq:
64 ; SSE2-NEXT: psrad $31, %xmm0
65 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
66 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
67 ; SSE2-NEXT: pxor %xmm1, %xmm0
70 ; SSE42-LABEL: test_pcmpgtq:
72 ; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
73 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
76 ; AVX-LABEL: test_pcmpgtq:
78 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
79 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
81 %sign = ashr <2 x i64> %x, <i64 63, i64 63>
82 %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
86 define <1 x i128> @test_strange_type(<1 x i128> %x) {
87 ; CHECK-LABEL: test_strange_type:
89 ; CHECK-NEXT: movq %rsi, %rax
90 ; CHECK-NEXT: sarq $63, %rax
91 ; CHECK-NEXT: notq %rax
92 ; CHECK-NEXT: movq %rax, %rdx
94 %sign = ashr <1 x i128> %x, <i128 127>
95 %not = xor <1 x i128> %sign, <i128 -1>
99 define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) {
100 ; SSE-LABEL: test_pcmpgtb_256:
102 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
103 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
104 ; SSE-NEXT: pcmpgtb %xmm2, %xmm1
107 ; AVX1-LABEL: test_pcmpgtb_256:
109 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
110 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
111 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
112 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
113 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
114 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
115 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
116 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
119 ; AVX2-LABEL: test_pcmpgtb_256:
121 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
122 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
124 %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
125 %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
129 define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) {
130 ; SSE-LABEL: test_pcmpgtw_256:
132 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
133 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
134 ; SSE-NEXT: pcmpgtw %xmm2, %xmm1
137 ; AVX1-LABEL: test_pcmpgtw_256:
139 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
140 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
141 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
142 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
143 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
144 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
145 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
148 ; AVX2-LABEL: test_pcmpgtw_256:
150 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
151 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
153 %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
154 %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
158 define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
159 ; SSE-LABEL: test_pcmpgtd_256:
161 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
162 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
163 ; SSE-NEXT: pcmpgtd %xmm2, %xmm1
166 ; AVX1-LABEL: test_pcmpgtd_256:
168 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
169 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
170 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
171 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
172 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
173 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
174 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
177 ; AVX2-LABEL: test_pcmpgtd_256:
179 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
180 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
182 %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
183 %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
187 define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
188 ; SSE2-LABEL: test_pcmpgtq_256:
190 ; SSE2-NEXT: psrad $31, %xmm1
191 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
192 ; SSE2-NEXT: psrad $31, %xmm0
193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
194 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
195 ; SSE2-NEXT: pxor %xmm2, %xmm0
196 ; SSE2-NEXT: pxor %xmm2, %xmm1
199 ; SSE42-LABEL: test_pcmpgtq_256:
201 ; SSE42-NEXT: pcmpeqd %xmm2, %xmm2
202 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
203 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm1
206 ; AVX1-LABEL: test_pcmpgtq_256:
208 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
209 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
210 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
211 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
212 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
213 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
214 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
215 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
218 ; AVX2-LABEL: test_pcmpgtq_256:
220 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
221 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
223 %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
224 %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1>
228 define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
229 ; SSE-LABEL: cmpeq_zext_v16i8:
231 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
232 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
235 ; AVX-LABEL: cmpeq_zext_v16i8:
237 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
238 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
240 %cmp = icmp eq <16 x i8> %a, %b
241 %zext = zext <16 x i1> %cmp to <16 x i8>
245 define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) {
246 ; SSE-LABEL: cmpeq_zext_v16i16:
248 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
249 ; SSE-NEXT: psrlw $15, %xmm0
250 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
251 ; SSE-NEXT: psrlw $15, %xmm1
254 ; AVX1-LABEL: cmpeq_zext_v16i16:
256 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
257 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
258 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
259 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
260 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
261 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
264 ; AVX2-LABEL: cmpeq_zext_v16i16:
266 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
267 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
269 %cmp = icmp eq <16 x i16> %a, %b
270 %zext = zext <16 x i1> %cmp to <16 x i16>
274 define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) {
275 ; SSE-LABEL: cmpeq_zext_v4i32:
277 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
278 ; SSE-NEXT: psrld $31, %xmm0
281 ; AVX-LABEL: cmpeq_zext_v4i32:
283 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
284 ; AVX-NEXT: vpsrld $31, %xmm0, %xmm0
286 %cmp = icmp eq <4 x i32> %a, %b
287 %zext = zext <4 x i1> %cmp to <4 x i32>
291 define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) {
292 ; SSE2-LABEL: cmpeq_zext_v4i64:
294 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
295 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
296 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
297 ; SSE2-NEXT: pand %xmm4, %xmm2
298 ; SSE2-NEXT: pand %xmm2, %xmm0
299 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
300 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
301 ; SSE2-NEXT: pand %xmm4, %xmm2
302 ; SSE2-NEXT: pand %xmm2, %xmm1
305 ; SSE42-LABEL: cmpeq_zext_v4i64:
307 ; SSE42-NEXT: pcmpeqq %xmm2, %xmm0
308 ; SSE42-NEXT: psrlq $63, %xmm0
309 ; SSE42-NEXT: pcmpeqq %xmm3, %xmm1
310 ; SSE42-NEXT: psrlq $63, %xmm1
313 ; AVX1-LABEL: cmpeq_zext_v4i64:
315 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
316 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
317 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2
318 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
319 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
320 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
323 ; AVX2-LABEL: cmpeq_zext_v4i64:
325 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
326 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
328 %cmp = icmp eq <4 x i64> %a, %b
329 %zext = zext <4 x i1> %cmp to <4 x i64>
333 define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
334 ; SSE-LABEL: cmpgt_zext_v32i8:
336 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
337 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
338 ; SSE-NEXT: pand %xmm2, %xmm0
339 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
340 ; SSE-NEXT: pand %xmm2, %xmm1
343 ; AVX1-LABEL: cmpgt_zext_v32i8:
345 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
346 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
347 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
348 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
349 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
350 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
353 ; AVX2-LABEL: cmpgt_zext_v32i8:
355 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
356 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
358 %cmp = icmp sgt <32 x i8> %a, %b
359 %zext = zext <32 x i1> %cmp to <32 x i8>
363 define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) {
364 ; SSE-LABEL: cmpgt_zext_v8i16:
366 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
367 ; SSE-NEXT: psrlw $15, %xmm0
370 ; AVX-LABEL: cmpgt_zext_v8i16:
372 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
373 ; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
375 %cmp = icmp sgt <8 x i16> %a, %b
376 %zext = zext <8 x i1> %cmp to <8 x i16>
380 define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) {
381 ; SSE-LABEL: cmpgt_zext_v8i32:
383 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
384 ; SSE-NEXT: psrld $31, %xmm0
385 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
386 ; SSE-NEXT: psrld $31, %xmm1
389 ; AVX1-LABEL: cmpgt_zext_v8i32:
391 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
392 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
393 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
394 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
395 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
396 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
399 ; AVX2-LABEL: cmpgt_zext_v8i32:
401 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
402 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
404 %cmp = icmp sgt <8 x i32> %a, %b
405 %zext = zext <8 x i1> %cmp to <8 x i32>
409 define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
410 ; SSE2-LABEL: cmpgt_zext_v2i64:
412 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
413 ; SSE2-NEXT: pxor %xmm2, %xmm1
414 ; SSE2-NEXT: pxor %xmm2, %xmm0
415 ; SSE2-NEXT: movdqa %xmm0, %xmm2
416 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
417 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
418 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
419 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
420 ; SSE2-NEXT: pand %xmm3, %xmm1
421 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
422 ; SSE2-NEXT: por %xmm1, %xmm0
423 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
426 ; SSE42-LABEL: cmpgt_zext_v2i64:
428 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
429 ; SSE42-NEXT: psrlq $63, %xmm0
432 ; AVX-LABEL: cmpgt_zext_v2i64:
434 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
435 ; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0
437 %cmp = icmp sgt <2 x i64> %a, %b
438 %zext = zext <2 x i1> %cmp to <2 x i64>
442 ; Test that we optimize a zext of a vector setcc ne zero where all bits but the
443 ; lsb are known to be zero.
444 define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) {
445 ; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
447 ; SSE2-NEXT: movdqa %xmm0, %xmm1
448 ; SSE2-NEXT: psrlw $15, %xmm1
449 ; SSE2-NEXT: pxor %xmm2, %xmm2
450 ; SSE2-NEXT: movdqa %xmm1, %xmm0
451 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
452 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
455 ; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
457 ; SSE42-NEXT: psrlw $15, %xmm0
458 ; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
459 ; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
460 ; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
461 ; SSE42-NEXT: movdqa %xmm2, %xmm0
464 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
466 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
467 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
468 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
469 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
470 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
473 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
475 ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
476 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
478 %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
479 %b = icmp ne <8 x i16> %a, zeroinitializer
480 %c = zext <8 x i1> %b to <8 x i32>
484 define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) {
485 ; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
487 ; SSE-NEXT: psrld $31, %xmm0
488 ; SSE-NEXT: psrld $31, %xmm1
491 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
493 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
494 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
495 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
496 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
499 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
501 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
503 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
504 %b = icmp ne <8 x i32> %a, zeroinitializer
505 %c = zext <8 x i1> %b to <8 x i32>
509 define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) {
510 ; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
512 ; SSE2-NEXT: psrld $31, %xmm1
513 ; SSE2-NEXT: psrld $31, %xmm0
514 ; SSE2-NEXT: packuswb %xmm1, %xmm0
517 ; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
519 ; SSE42-NEXT: psrld $31, %xmm1
520 ; SSE42-NEXT: psrld $31, %xmm0
521 ; SSE42-NEXT: packusdw %xmm1, %xmm0
524 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
526 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
527 ; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
528 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
529 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
530 ; AVX1-NEXT: vzeroupper
533 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
535 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
536 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
537 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
538 ; AVX2-NEXT: vzeroupper
540 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
541 %b = icmp ne <8 x i32> %a, zeroinitializer
542 %c = zext <8 x i1> %b to <8 x i16>