1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7 ; PR33276 - https://bugs.llvm.org/show_bug.cgi?id=33276
8 ; If both operands of an unsigned icmp are known non-negative, then
9 ; we don't need to flip the sign bits in order to map to signed pcmpgt*.
11 define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
12 ; SSE-LABEL: ugt_v2i64:
14 ; SSE-NEXT: psrlq $1, %xmm0
15 ; SSE-NEXT: psrlq $1, %xmm1
16 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
17 ; SSE-NEXT: pxor %xmm2, %xmm1
18 ; SSE-NEXT: pxor %xmm2, %xmm0
19 ; SSE-NEXT: movdqa %xmm0, %xmm2
20 ; SSE-NEXT: pcmpgtd %xmm1, %xmm2
21 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
23 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24 ; SSE-NEXT: pand %xmm3, %xmm1
25 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26 ; SSE-NEXT: por %xmm1, %xmm0
29 ; AVX-LABEL: ugt_v2i64:
31 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
32 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
33 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
35 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
36 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
37 %cmp = icmp ugt <2 x i64> %sh1, %sh2
41 define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
42 ; SSE-LABEL: ult_v2i64:
44 ; SSE-NEXT: psrlq $1, %xmm0
45 ; SSE-NEXT: psrlq $1, %xmm1
46 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
47 ; SSE-NEXT: pxor %xmm2, %xmm0
48 ; SSE-NEXT: pxor %xmm2, %xmm1
49 ; SSE-NEXT: movdqa %xmm1, %xmm2
50 ; SSE-NEXT: pcmpgtd %xmm0, %xmm2
51 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
52 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1
53 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
54 ; SSE-NEXT: pand %xmm3, %xmm1
55 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
56 ; SSE-NEXT: por %xmm1, %xmm0
59 ; AVX-LABEL: ult_v2i64:
61 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
62 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
63 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
65 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
66 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
67 %cmp = icmp ult <2 x i64> %sh1, %sh2
71 define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
72 ; SSE-LABEL: uge_v2i64:
74 ; SSE-NEXT: psrlq $1, %xmm0
75 ; SSE-NEXT: psrlq $1, %xmm1
76 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
77 ; SSE-NEXT: pxor %xmm2, %xmm0
78 ; SSE-NEXT: pxor %xmm2, %xmm1
79 ; SSE-NEXT: movdqa %xmm1, %xmm2
80 ; SSE-NEXT: pcmpgtd %xmm0, %xmm2
81 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
82 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1
83 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
84 ; SSE-NEXT: pand %xmm3, %xmm0
85 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
86 ; SSE-NEXT: por %xmm0, %xmm1
87 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
88 ; SSE-NEXT: pxor %xmm1, %xmm0
91 ; AVX-LABEL: uge_v2i64:
93 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
94 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
95 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
96 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
97 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
99 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
100 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
101 %cmp = icmp uge <2 x i64> %sh1, %sh2
105 define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
106 ; SSE-LABEL: ule_v2i64:
108 ; SSE-NEXT: psrlq $1, %xmm0
109 ; SSE-NEXT: psrlq $1, %xmm1
110 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
111 ; SSE-NEXT: pxor %xmm2, %xmm1
112 ; SSE-NEXT: pxor %xmm2, %xmm0
113 ; SSE-NEXT: movdqa %xmm0, %xmm2
114 ; SSE-NEXT: pcmpgtd %xmm1, %xmm2
115 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
116 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
117 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
118 ; SSE-NEXT: pand %xmm3, %xmm0
119 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
120 ; SSE-NEXT: por %xmm0, %xmm1
121 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
122 ; SSE-NEXT: pxor %xmm1, %xmm0
125 ; AVX-LABEL: ule_v2i64:
127 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
128 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
129 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
130 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
131 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
133 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
134 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
135 %cmp = icmp ule <2 x i64> %sh1, %sh2
139 define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
140 ; SSE-LABEL: ugt_v4i32:
142 ; SSE-NEXT: psrld $1, %xmm0
143 ; SSE-NEXT: psrld $1, %xmm1
144 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
147 ; AVX-LABEL: ugt_v4i32:
149 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
150 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
151 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
153 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
154 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
155 %cmp = icmp ugt <4 x i32> %sh1, %sh2
159 define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
160 ; SSE-LABEL: ult_v4i32:
162 ; SSE-NEXT: psrld $1, %xmm0
163 ; SSE-NEXT: psrld $1, %xmm1
164 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
165 ; SSE-NEXT: movdqa %xmm1, %xmm0
168 ; AVX-LABEL: ult_v4i32:
170 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
171 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
172 ; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
174 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
175 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
176 %cmp = icmp ult <4 x i32> %sh1, %sh2
180 define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
181 ; SSE2-LABEL: uge_v4i32:
183 ; SSE2-NEXT: psrld $1, %xmm0
184 ; SSE2-NEXT: psrld $1, %xmm1
185 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
186 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
187 ; SSE2-NEXT: pxor %xmm1, %xmm0
190 ; SSE41-LABEL: uge_v4i32:
192 ; SSE41-NEXT: psrld $1, %xmm0
193 ; SSE41-NEXT: psrld $1, %xmm1
194 ; SSE41-NEXT: pmaxud %xmm0, %xmm1
195 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
198 ; AVX-LABEL: uge_v4i32:
200 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
201 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
202 ; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm1
203 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
205 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
206 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
207 %cmp = icmp uge <4 x i32> %sh1, %sh2
211 define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
212 ; SSE2-LABEL: ule_v4i32:
214 ; SSE2-NEXT: psrld $1, %xmm0
215 ; SSE2-NEXT: psrld $1, %xmm1
216 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
217 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
218 ; SSE2-NEXT: pxor %xmm1, %xmm0
221 ; SSE41-LABEL: ule_v4i32:
223 ; SSE41-NEXT: psrld $1, %xmm0
224 ; SSE41-NEXT: psrld $1, %xmm1
225 ; SSE41-NEXT: pminud %xmm0, %xmm1
226 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
229 ; AVX-LABEL: ule_v4i32:
231 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
232 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
233 ; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1
234 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
236 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
237 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
238 %cmp = icmp ule <4 x i32> %sh1, %sh2
242 define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
243 ; SSE-LABEL: ugt_v8i16:
245 ; SSE-NEXT: psrlw $1, %xmm0
246 ; SSE-NEXT: psrlw $1, %xmm1
247 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
250 ; AVX-LABEL: ugt_v8i16:
252 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
253 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
254 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
256 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
257 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
258 %cmp = icmp ugt <8 x i16> %sh1, %sh2
262 define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
263 ; SSE-LABEL: ult_v8i16:
265 ; SSE-NEXT: psrlw $1, %xmm0
266 ; SSE-NEXT: psrlw $1, %xmm1
267 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1
268 ; SSE-NEXT: movdqa %xmm1, %xmm0
271 ; AVX-LABEL: ult_v8i16:
273 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
274 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
275 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
277 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
278 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
279 %cmp = icmp ult <8 x i16> %sh1, %sh2
283 define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) {
284 ; SSE2-LABEL: uge_v8i16:
286 ; SSE2-NEXT: psrlw $1, %xmm0
287 ; SSE2-NEXT: psrlw $1, %xmm1
288 ; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
289 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
290 ; SSE2-NEXT: pxor %xmm1, %xmm0
293 ; SSE41-LABEL: uge_v8i16:
295 ; SSE41-NEXT: psrlw $1, %xmm0
296 ; SSE41-NEXT: psrlw $1, %xmm1
297 ; SSE41-NEXT: pmaxuw %xmm0, %xmm1
298 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
301 ; AVX-LABEL: uge_v8i16:
303 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
304 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
305 ; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
306 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
308 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
309 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
310 %cmp = icmp uge <8 x i16> %sh1, %sh2
314 define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) {
315 ; SSE2-LABEL: ule_v8i16:
317 ; SSE2-NEXT: psrlw $1, %xmm0
318 ; SSE2-NEXT: psrlw $1, %xmm1
319 ; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
320 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
321 ; SSE2-NEXT: pxor %xmm1, %xmm0
324 ; SSE41-LABEL: ule_v8i16:
326 ; SSE41-NEXT: psrlw $1, %xmm0
327 ; SSE41-NEXT: psrlw $1, %xmm1
328 ; SSE41-NEXT: pminuw %xmm0, %xmm1
329 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
332 ; AVX-LABEL: ule_v8i16:
334 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
335 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
336 ; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm1
337 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
339 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
340 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
341 %cmp = icmp ule <8 x i16> %sh1, %sh2
345 define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
346 ; SSE-LABEL: ugt_v16i8:
348 ; SSE-NEXT: psrlw $1, %xmm0
349 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
350 ; SSE-NEXT: pand %xmm2, %xmm0
351 ; SSE-NEXT: psrlw $1, %xmm1
352 ; SSE-NEXT: pand %xmm2, %xmm1
353 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
356 ; AVX1-LABEL: ugt_v16i8:
358 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
359 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
360 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
361 ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
362 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
363 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
366 ; AVX2-LABEL: ugt_v16i8:
368 ; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0
369 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
370 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
371 ; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
372 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
373 ; AVX2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
375 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
376 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
377 %cmp = icmp ugt <16 x i8> %sh1, %sh2
381 define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
382 ; SSE-LABEL: ult_v16i8:
384 ; SSE-NEXT: psrlw $1, %xmm0
385 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
386 ; SSE-NEXT: pand %xmm2, %xmm0
387 ; SSE-NEXT: psrlw $1, %xmm1
388 ; SSE-NEXT: pand %xmm1, %xmm2
389 ; SSE-NEXT: pcmpgtb %xmm0, %xmm2
390 ; SSE-NEXT: movdqa %xmm2, %xmm0
393 ; AVX1-LABEL: ult_v16i8:
395 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
396 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
397 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
398 ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
399 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
400 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
403 ; AVX2-LABEL: ult_v16i8:
405 ; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0
406 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
407 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
408 ; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
409 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
410 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
412 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
413 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
414 %cmp = icmp ult <16 x i8> %sh1, %sh2
418 define <16 x i1> @uge_v16i8(<16 x i8> %x, <16 x i8> %y) {
419 ; SSE-LABEL: uge_v16i8:
421 ; SSE-NEXT: psrlw $1, %xmm0
422 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
423 ; SSE-NEXT: pand %xmm2, %xmm0
424 ; SSE-NEXT: psrlw $1, %xmm1
425 ; SSE-NEXT: pand %xmm1, %xmm2
426 ; SSE-NEXT: pmaxub %xmm0, %xmm2
427 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
430 ; AVX1-LABEL: uge_v16i8:
432 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
433 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
434 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
435 ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
436 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
437 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
438 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
441 ; AVX2-LABEL: uge_v16i8:
443 ; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0
444 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
445 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
446 ; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
447 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
448 ; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
449 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
451 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
452 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
453 %cmp = icmp uge <16 x i8> %sh1, %sh2
457 define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) {
458 ; SSE-LABEL: ule_v16i8:
460 ; SSE-NEXT: psrlw $1, %xmm0
461 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
462 ; SSE-NEXT: pand %xmm2, %xmm0
463 ; SSE-NEXT: psrlw $1, %xmm1
464 ; SSE-NEXT: pand %xmm1, %xmm2
465 ; SSE-NEXT: pminub %xmm0, %xmm2
466 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
469 ; AVX1-LABEL: ule_v16i8:
471 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
472 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
473 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
474 ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
475 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
476 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm1
477 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
480 ; AVX2-LABEL: ule_v16i8:
482 ; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0
483 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
484 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
485 ; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
486 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
487 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm1
488 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
490 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
491 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
492 %cmp = icmp ule <16 x i8> %sh1, %sh2
496 define <8 x i16> @PR47448_uge(i16 signext %0) {
497 ; SSE2-LABEL: PR47448_uge:
499 ; SSE2-NEXT: andl $7, %edi
500 ; SSE2-NEXT: movd %edi, %xmm0
501 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
502 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
503 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
504 ; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
505 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
506 ; SSE2-NEXT: pxor %xmm1, %xmm0
509 ; SSE41-LABEL: PR47448_uge:
511 ; SSE41-NEXT: andl $7, %edi
512 ; SSE41-NEXT: movd %edi, %xmm0
513 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
514 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
515 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
516 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0
517 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
520 ; AVX1-LABEL: PR47448_uge:
522 ; AVX1-NEXT: andl $7, %edi
523 ; AVX1-NEXT: vmovd %edi, %xmm0
524 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
525 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
526 ; AVX1-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
527 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
530 ; AVX2-LABEL: PR47448_uge:
532 ; AVX2-NEXT: andl $7, %edi
533 ; AVX2-NEXT: vmovd %edi, %xmm0
534 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
535 ; AVX2-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
536 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
539 %3 = insertelement <8 x i16> undef, i16 %2, i32 0
540 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
541 %5 = icmp uge <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
542 %6 = sext <8 x i1> %5 to <8 x i16>
546 define <8 x i16> @PR47448_ugt(i16 signext %0) {
547 ; SSE-LABEL: PR47448_ugt:
549 ; SSE-NEXT: andl $7, %edi
550 ; SSE-NEXT: movd %edi, %xmm0
551 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
552 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
553 ; SSE-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
556 ; AVX1-LABEL: PR47448_ugt:
558 ; AVX1-NEXT: andl $7, %edi
559 ; AVX1-NEXT: vmovd %edi, %xmm0
560 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
561 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
562 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
565 ; AVX2-LABEL: PR47448_ugt:
567 ; AVX2-NEXT: andl $7, %edi
568 ; AVX2-NEXT: vmovd %edi, %xmm0
569 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
570 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
573 %3 = insertelement <8 x i16> undef, i16 %2, i32 0
574 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
575 %5 = icmp ugt <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
576 %6 = sext <8 x i1> %5 to <8 x i16>
580 ; Recognise the knownbits from X86ISD::AND in previous block.
581 define void @PR54171(ptr %mask0, ptr %mask1, i64 %i) {
582 ; SSE-LABEL: PR54171:
583 ; SSE: # %bb.0: # %entry
584 ; SSE-NEXT: andq $7, %rdx
585 ; SSE-NEXT: je .LBB18_2
586 ; SSE-NEXT: # %bb.1: # %if.then
587 ; SSE-NEXT: movd %edx, %xmm0
588 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
589 ; SSE-NEXT: movdqa %xmm0, %xmm1
590 ; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
591 ; SSE-NEXT: movdqa %xmm0, %xmm2
592 ; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
593 ; SSE-NEXT: movdqa %xmm2, (%rdi)
594 ; SSE-NEXT: movdqa %xmm1, 16(%rdi)
595 ; SSE-NEXT: movdqa %xmm0, %xmm1
596 ; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
597 ; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
598 ; SSE-NEXT: movdqa %xmm0, (%rsi)
599 ; SSE-NEXT: movdqa %xmm1, 16(%rsi)
600 ; SSE-NEXT: .LBB18_2: # %if.end
603 ; AVX1-LABEL: PR54171:
604 ; AVX1: # %bb.0: # %entry
605 ; AVX1-NEXT: andq $7, %rdx
606 ; AVX1-NEXT: je .LBB18_2
607 ; AVX1-NEXT: # %bb.1: # %if.then
608 ; AVX1-NEXT: vmovd %edx, %xmm0
609 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
610 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
611 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
612 ; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
613 ; AVX1-NEXT: vmovdqa %xmm1, 16(%rdi)
614 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
615 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
616 ; AVX1-NEXT: vmovdqa %xmm0, (%rsi)
617 ; AVX1-NEXT: vmovdqa %xmm1, 16(%rsi)
618 ; AVX1-NEXT: .LBB18_2: # %if.end
621 ; AVX2-LABEL: PR54171:
622 ; AVX2: # %bb.0: # %entry
623 ; AVX2-NEXT: andq $7, %rdx
624 ; AVX2-NEXT: je .LBB18_2
625 ; AVX2-NEXT: # %bb.1: # %if.then
626 ; AVX2-NEXT: vmovd %edx, %xmm0
627 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
628 ; AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
629 ; AVX2-NEXT: vmovdqa %ymm1, (%rdi)
630 ; AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
631 ; AVX2-NEXT: vmovdqa %ymm0, (%rsi)
632 ; AVX2-NEXT: .LBB18_2: # %if.end
633 ; AVX2-NEXT: vzeroupper
637 %cmp.not = icmp eq i64 %sub, 0
638 br i1 %cmp.not, label %if.end, label %if.then
641 %conv = trunc i64 %sub to i32
642 %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv, i64 0
643 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> poison, <8 x i32> zeroinitializer
644 %cmp.i = icmp ugt <8 x i32> %vecinit7.i.i, <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
645 %sext.i = sext <8 x i1> %cmp.i to <8 x i32>
646 store <8 x i32> %sext.i, ptr %mask0, align 32
647 %cmp.i18 = icmp ugt <8 x i32> %vecinit7.i.i, <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
648 %sext.i19 = sext <8 x i1> %cmp.i18 to <8 x i32>
649 store <8 x i32> %sext.i19, ptr %mask1, align 32