1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
7 ; Lower common integer comparisons such as 'isPositive' efficiently:
8 ; https://llvm.org/bugs/show_bug.cgi?id=26701
10 define <16 x i8> @test_pcmpgtb(<16 x i8> %x) {
11 ; SSE-LABEL: test_pcmpgtb:
13 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
14 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
17 ; AVX-LABEL: test_pcmpgtb:
19 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
20 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
22 %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
23 %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
27 define <8 x i16> @test_pcmpgtw(<8 x i16> %x) {
28 ; SSE-LABEL: test_pcmpgtw:
30 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
31 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
34 ; AVX-LABEL: test_pcmpgtw:
36 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
37 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
39 %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
40 %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
44 define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
45 ; SSE-LABEL: test_pcmpgtd:
47 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
48 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
51 ; AVX-LABEL: test_pcmpgtd:
53 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
54 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
56 %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
57 %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
61 define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
62 ; SSE2-LABEL: test_pcmpgtq:
64 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
65 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
66 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
69 ; SSE42-LABEL: test_pcmpgtq:
71 ; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
72 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
75 ; AVX-LABEL: test_pcmpgtq:
77 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
78 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
80 %sign = ashr <2 x i64> %x, <i64 63, i64 63>
81 %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
85 define <1 x i128> @test_strange_type(<1 x i128> %x) {
86 ; CHECK-LABEL: test_strange_type:
88 ; CHECK-NEXT: movq %rsi, %rax
89 ; CHECK-NEXT: sarq $63, %rax
90 ; CHECK-NEXT: notq %rax
91 ; CHECK-NEXT: movq %rax, %rdx
93 %sign = ashr <1 x i128> %x, <i128 127>
94 %not = xor <1 x i128> %sign, <i128 -1>
98 define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) {
99 ; SSE-LABEL: test_pcmpgtb_256:
101 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
102 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
103 ; SSE-NEXT: pcmpgtb %xmm2, %xmm1
106 ; AVX1-LABEL: test_pcmpgtb_256:
108 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
109 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
110 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
111 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
112 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
113 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
114 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
115 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
118 ; AVX2-LABEL: test_pcmpgtb_256:
120 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
121 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
123 %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
124 %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
128 define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) {
129 ; SSE-LABEL: test_pcmpgtw_256:
131 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
132 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
133 ; SSE-NEXT: pcmpgtw %xmm2, %xmm1
136 ; AVX1-LABEL: test_pcmpgtw_256:
138 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
139 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
140 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
141 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
142 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
143 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
144 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
147 ; AVX2-LABEL: test_pcmpgtw_256:
149 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
150 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
152 %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
153 %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
157 define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
158 ; SSE-LABEL: test_pcmpgtd_256:
160 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
161 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
162 ; SSE-NEXT: pcmpgtd %xmm2, %xmm1
165 ; AVX1-LABEL: test_pcmpgtd_256:
167 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
168 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
169 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
170 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
171 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
172 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
173 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
176 ; AVX2-LABEL: test_pcmpgtd_256:
178 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
179 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
181 %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
182 %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
186 define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
187 ; SSE2-LABEL: test_pcmpgtq_256:
189 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
190 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
191 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
192 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
193 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
196 ; SSE42-LABEL: test_pcmpgtq_256:
198 ; SSE42-NEXT: pcmpeqd %xmm2, %xmm2
199 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
200 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm1
203 ; AVX1-LABEL: test_pcmpgtq_256:
205 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
206 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
207 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
208 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
209 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
210 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
211 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
212 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
215 ; AVX2-LABEL: test_pcmpgtq_256:
217 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
218 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
220 %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
221 %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1>
225 define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
226 ; SSE-LABEL: cmpeq_zext_v16i8:
228 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
229 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
232 ; AVX-LABEL: cmpeq_zext_v16i8:
234 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
235 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
237 %cmp = icmp eq <16 x i8> %a, %b
238 %zext = zext <16 x i1> %cmp to <16 x i8>
242 define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) {
243 ; SSE-LABEL: cmpeq_zext_v16i16:
245 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
246 ; SSE-NEXT: psrlw $15, %xmm0
247 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
248 ; SSE-NEXT: psrlw $15, %xmm1
251 ; AVX1-LABEL: cmpeq_zext_v16i16:
253 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
254 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
255 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
256 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
257 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
258 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
261 ; AVX2-LABEL: cmpeq_zext_v16i16:
263 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
264 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
266 %cmp = icmp eq <16 x i16> %a, %b
267 %zext = zext <16 x i1> %cmp to <16 x i16>
271 define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) {
272 ; SSE-LABEL: cmpeq_zext_v4i32:
274 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
275 ; SSE-NEXT: psrld $31, %xmm0
278 ; AVX-LABEL: cmpeq_zext_v4i32:
280 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
281 ; AVX-NEXT: vpsrld $31, %xmm0, %xmm0
283 %cmp = icmp eq <4 x i32> %a, %b
284 %zext = zext <4 x i1> %cmp to <4 x i32>
288 define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) {
289 ; SSE2-LABEL: cmpeq_zext_v4i64:
291 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
292 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
293 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
294 ; SSE2-NEXT: pand %xmm4, %xmm2
295 ; SSE2-NEXT: pand %xmm2, %xmm0
296 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
297 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
298 ; SSE2-NEXT: pand %xmm4, %xmm2
299 ; SSE2-NEXT: pand %xmm2, %xmm1
302 ; SSE42-LABEL: cmpeq_zext_v4i64:
304 ; SSE42-NEXT: pcmpeqq %xmm2, %xmm0
305 ; SSE42-NEXT: psrlq $63, %xmm0
306 ; SSE42-NEXT: pcmpeqq %xmm3, %xmm1
307 ; SSE42-NEXT: psrlq $63, %xmm1
310 ; AVX1-LABEL: cmpeq_zext_v4i64:
312 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
313 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
314 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2
315 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
316 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
317 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
320 ; AVX2-LABEL: cmpeq_zext_v4i64:
322 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
323 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
325 %cmp = icmp eq <4 x i64> %a, %b
326 %zext = zext <4 x i1> %cmp to <4 x i64>
330 define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
331 ; SSE-LABEL: cmpgt_zext_v32i8:
333 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
334 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
335 ; SSE-NEXT: pand %xmm2, %xmm0
336 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
337 ; SSE-NEXT: pand %xmm2, %xmm1
340 ; AVX1-LABEL: cmpgt_zext_v32i8:
342 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
343 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
344 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
345 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
346 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
347 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
350 ; AVX2-LABEL: cmpgt_zext_v32i8:
352 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
353 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
355 %cmp = icmp sgt <32 x i8> %a, %b
356 %zext = zext <32 x i1> %cmp to <32 x i8>
360 define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) {
361 ; SSE-LABEL: cmpgt_zext_v8i16:
363 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
364 ; SSE-NEXT: psrlw $15, %xmm0
367 ; AVX-LABEL: cmpgt_zext_v8i16:
369 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
370 ; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
372 %cmp = icmp sgt <8 x i16> %a, %b
373 %zext = zext <8 x i1> %cmp to <8 x i16>
377 define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) {
378 ; SSE-LABEL: cmpgt_zext_v8i32:
380 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
381 ; SSE-NEXT: psrld $31, %xmm0
382 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
383 ; SSE-NEXT: psrld $31, %xmm1
386 ; AVX1-LABEL: cmpgt_zext_v8i32:
388 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
389 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
390 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
391 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
392 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
393 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
396 ; AVX2-LABEL: cmpgt_zext_v8i32:
398 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
399 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
401 %cmp = icmp sgt <8 x i32> %a, %b
402 %zext = zext <8 x i1> %cmp to <8 x i32>
406 define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
407 ; SSE2-LABEL: cmpgt_zext_v2i64:
409 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
410 ; SSE2-NEXT: pxor %xmm2, %xmm1
411 ; SSE2-NEXT: pxor %xmm2, %xmm0
412 ; SSE2-NEXT: movdqa %xmm0, %xmm2
413 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
414 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
415 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
416 ; SSE2-NEXT: pand %xmm2, %xmm1
417 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
418 ; SSE2-NEXT: por %xmm1, %xmm0
419 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
422 ; SSE42-LABEL: cmpgt_zext_v2i64:
424 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
425 ; SSE42-NEXT: psrlq $63, %xmm0
428 ; AVX-LABEL: cmpgt_zext_v2i64:
430 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
431 ; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0
433 %cmp = icmp sgt <2 x i64> %a, %b
434 %zext = zext <2 x i1> %cmp to <2 x i64>
438 ; Test that we optimize a zext of a vector setcc ne zero where all bits but the
439 ; lsb are known to be zero.
440 define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) {
441 ; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
443 ; SSE2-NEXT: movdqa %xmm0, %xmm1
444 ; SSE2-NEXT: psrlw $15, %xmm1
445 ; SSE2-NEXT: pxor %xmm2, %xmm2
446 ; SSE2-NEXT: movdqa %xmm1, %xmm0
447 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
448 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
451 ; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
453 ; SSE42-NEXT: movdqa %xmm0, %xmm1
454 ; SSE42-NEXT: psrlw $15, %xmm1
455 ; SSE42-NEXT: pxor %xmm2, %xmm2
456 ; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
457 ; SSE42-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
460 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
462 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
463 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
464 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
465 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
466 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
469 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
471 ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
472 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
474 %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
475 %b = icmp ne <8 x i16> %a, zeroinitializer
476 %c = zext <8 x i1> %b to <8 x i32>
480 define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) {
481 ; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
483 ; SSE-NEXT: psrld $31, %xmm0
484 ; SSE-NEXT: psrld $31, %xmm1
487 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
489 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
490 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
491 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
492 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
495 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
497 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
499 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
500 %b = icmp ne <8 x i32> %a, zeroinitializer
501 %c = zext <8 x i1> %b to <8 x i32>
505 define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) {
506 ; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
508 ; SSE2-NEXT: psrld $31, %xmm1
509 ; SSE2-NEXT: psrld $31, %xmm0
510 ; SSE2-NEXT: packuswb %xmm1, %xmm0
513 ; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
515 ; SSE42-NEXT: psrld $31, %xmm1
516 ; SSE42-NEXT: psrld $31, %xmm0
517 ; SSE42-NEXT: packusdw %xmm1, %xmm0
520 ; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
522 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
523 ; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
524 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
525 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
526 ; AVX1-NEXT: vzeroupper
529 ; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
531 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
532 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
533 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
534 ; AVX2-NEXT: vzeroupper
536 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
537 %b = icmp ne <8 x i32> %a, zeroinitializer
538 %c = zext <8 x i1> %b to <8 x i16>
543 define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) {
544 ; SSE-LABEL: cmpeq_one_mask_bit:
546 ; SSE-NEXT: psrad $31, %xmm0
549 ; AVX-LABEL: cmpeq_one_mask_bit:
551 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
553 %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
554 %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer
555 %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32>
556 ret <4 x i32> %mask_bool_ext