1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,SSE2
3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,SSE41
5 define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
6 ; SSE2-LABEL: test_eq_1:
8 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
9 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
10 ; SSE2-NEXT: movd %xmm0, %eax
11 ; SSE2-NEXT: notl %eax
14 ; SSE41-LABEL: test_eq_1:
16 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
17 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
18 ; SSE41-NEXT: notl %eax
20 %cmp = icmp slt <4 x i32> %A, %B
21 %sext = sext <4 x i1> %cmp to <4 x i32>
22 %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer
23 %t0 = extractelement <4 x i1> %cmp1, i32 1
24 %t1 = sext i1 %t0 to i32
28 define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) {
29 ; SSE2-LABEL: test_ne_1:
31 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
32 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
33 ; SSE2-NEXT: movd %xmm0, %eax
36 ; SSE41-LABEL: test_ne_1:
38 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
39 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
41 %cmp = icmp slt <4 x i32> %A, %B
42 %sext = sext <4 x i1> %cmp to <4 x i32>
43 %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer
44 %t0 = extractelement <4 x i1> %cmp1, i32 1
45 %t1 = sext i1 %t0 to i32
49 define i32 @test_le_1(<4 x i32> %A, <4 x i32> %B) {
50 ; CHECK-LABEL: test_le_1:
52 ; CHECK-NEXT: movl $-1, %eax
54 %cmp = icmp slt <4 x i32> %A, %B
55 %sext = sext <4 x i1> %cmp to <4 x i32>
56 %cmp1 = icmp sle <4 x i32> %sext, zeroinitializer
57 %t0 = extractelement <4 x i1> %cmp1, i32 1
58 %t1 = sext i1 %t0 to i32
62 define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
63 ; SSE2-LABEL: test_ge_1:
65 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
66 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
67 ; SSE2-NEXT: movd %xmm0, %eax
68 ; SSE2-NEXT: notl %eax
71 ; SSE41-LABEL: test_ge_1:
73 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
74 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
75 ; SSE41-NEXT: notl %eax
77 %cmp = icmp slt <4 x i32> %A, %B
78 %sext = sext <4 x i1> %cmp to <4 x i32>
79 %cmp1 = icmp sge <4 x i32> %sext, zeroinitializer
80 %t0 = extractelement <4 x i1> %cmp1, i32 1
81 %t1 = sext i1 %t0 to i32
85 define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) {
86 ; SSE2-LABEL: test_lt_1:
88 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
89 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
90 ; SSE2-NEXT: movd %xmm0, %eax
93 ; SSE41-LABEL: test_lt_1:
95 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
96 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
98 %cmp = icmp slt <4 x i32> %A, %B
99 %sext = sext <4 x i1> %cmp to <4 x i32>
100 %cmp1 = icmp slt <4 x i32> %sext, zeroinitializer
101 %t0 = extractelement <4 x i1> %cmp, i32 1
102 %t1 = sext i1 %t0 to i32
106 define i32 @test_gt_1(<4 x i32> %A, <4 x i32> %B) {
107 ; CHECK-LABEL: test_gt_1:
109 ; CHECK-NEXT: xorl %eax, %eax
111 %cmp = icmp slt <4 x i32> %A, %B
112 %sext = sext <4 x i1> %cmp to <4 x i32>
113 %cmp1 = icmp sgt <4 x i32> %sext, zeroinitializer
114 %t0 = extractelement <4 x i1> %cmp1, i32 1
115 %t1 = sext i1 %t0 to i32
119 define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
120 ; SSE2-LABEL: test_eq_2:
122 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
123 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
124 ; SSE2-NEXT: movd %xmm0, %eax
125 ; SSE2-NEXT: notl %eax
128 ; SSE41-LABEL: test_eq_2:
130 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
131 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
132 ; SSE41-NEXT: notl %eax
134 %cmp = icmp slt <4 x i32> %B, %A
135 %sext = sext <4 x i1> %cmp to <4 x i32>
136 %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer
137 %t0 = extractelement <4 x i1> %cmp1, i32 1
138 %t1 = sext i1 %t0 to i32
142 define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) {
143 ; SSE2-LABEL: test_ne_2:
145 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
146 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
147 ; SSE2-NEXT: movd %xmm0, %eax
150 ; SSE41-LABEL: test_ne_2:
152 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
153 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
155 %cmp = icmp slt <4 x i32> %B, %A
156 %sext = sext <4 x i1> %cmp to <4 x i32>
157 %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer
158 %t0 = extractelement <4 x i1> %cmp1, i32 1
159 %t1 = sext i1 %t0 to i32
163 define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
164 ; SSE2-LABEL: test_le_2:
166 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
167 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
168 ; SSE2-NEXT: movd %xmm0, %eax
169 ; SSE2-NEXT: notl %eax
172 ; SSE41-LABEL: test_le_2:
174 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
175 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
176 ; SSE41-NEXT: notl %eax
178 %cmp = icmp slt <4 x i32> %B, %A
179 %sext = sext <4 x i1> %cmp to <4 x i32>
180 %cmp1 = icmp sle <4 x i32> zeroinitializer, %sext
181 %t0 = extractelement <4 x i1> %cmp1, i32 1
182 %t1 = sext i1 %t0 to i32
186 define i32 @test_ge_2(<4 x i32> %A, <4 x i32> %B) {
187 ; CHECK-LABEL: test_ge_2:
189 ; CHECK-NEXT: movl $-1, %eax
191 %cmp = icmp slt <4 x i32> %B, %A
192 %sext = sext <4 x i1> %cmp to <4 x i32>
193 %cmp1 = icmp sge <4 x i32> zeroinitializer, %sext
194 %t0 = extractelement <4 x i1> %cmp1, i32 1
195 %t1 = sext i1 %t0 to i32
199 define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) {
200 ; SSE2-LABEL: test_lt_2:
202 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
203 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
204 ; SSE2-NEXT: movd %xmm0, %eax
207 ; SSE41-LABEL: test_lt_2:
209 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
210 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
212 %cmp = icmp slt <4 x i32> %B, %A
213 %sext = sext <4 x i1> %cmp to <4 x i32>
214 %cmp1 = icmp slt <4 x i32> zeroinitializer, %sext
215 %t0 = extractelement <4 x i1> %cmp, i32 1
216 %t1 = sext i1 %t0 to i32
220 define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
221 ; SSE2-LABEL: test_gt_2:
223 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
224 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
225 ; SSE2-NEXT: movd %xmm0, %eax
228 ; SSE41-LABEL: test_gt_2:
230 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
231 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
233 %cmp = icmp slt <4 x i32> %B, %A
234 %sext = sext <4 x i1> %cmp to <4 x i32>
235 %cmp1 = icmp sgt <4 x i32> zeroinitializer, %sext
236 %t0 = extractelement <4 x i1> %cmp1, i32 1
237 %t1 = sext i1 %t0 to i32
241 ; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
242 ; Don't combine with i1 - out of range constant
243 define void @test_i1_uge(ptr%A2) {
244 ; CHECK-LABEL: test_i1_uge:
246 ; CHECK-NEXT: movzbl (%rdi), %eax
247 ; CHECK-NEXT: notb %al
248 ; CHECK-NEXT: andb $1, %al
249 ; CHECK-NEXT: movb %al, (%rdi)
251 %L5 = load i1, ptr %A2
252 %C3 = icmp ne i1 %L5, true
253 %C8 = icmp eq i1 %L5, false
254 %C9 = icmp ugt i1 %C3, %C8
255 %G3 = getelementptr i1, ptr %A2, i1 %C9
256 store i1 %C3, ptr %G3
260 ; This should not get folded to 0.
262 define i64 @PR40657(i8 %var2, i8 %var9) {
263 ; CHECK-LABEL: PR40657:
265 ; CHECK-NEXT: addb %sil, %dil
266 ; CHECK-NEXT: incb %dil
267 ; CHECK-NEXT: movzbl %dil, %eax
268 ; CHECK-NEXT: andl $1, %eax
270 %var6 = trunc i8 %var9 to i1
271 %var7 = trunc i8 175 to i1
272 %var3 = sub nsw i1 %var6, %var7
273 %var4 = icmp eq i64 1114591064, 1114591064
274 %var1 = udiv i1 %var3, %var4
275 %var0 = trunc i8 %var2 to i1
276 %res = sub nsw nuw i1 %var0, %var1
277 %res.cast = zext i1 %res to i64
281 ; This should not get folded to 0.
283 define i64 @PR40657_commute(i8 %var7, i8 %var8, i8 %var9) {
284 ; CHECK-LABEL: PR40657_commute:
286 ; CHECK-NEXT: subb %dil, %sil
287 ; CHECK-NEXT: subb %sil, %dl
288 ; CHECK-NEXT: subb %dl, %sil
289 ; CHECK-NEXT: xorb %dl, %sil
290 ; CHECK-NEXT: subb %sil, %dl
291 ; CHECK-NEXT: movzbl %dl, %eax
292 ; CHECK-NEXT: andl $1, %eax
294 %var4 = trunc i8 %var9 to i1
295 %var5 = trunc i8 %var8 to i1
296 %var6 = trunc i8 %var7 to i1
297 %var3 = sub nsw nuw i1 %var5, %var6
298 %var0 = sub nuw i1 %var4, %var3
299 %var2 = sub i1 %var3, %var0
300 %var1 = icmp ne i1 %var0, %var2
301 %res = sub nsw nuw i1 %var0, %var1
302 %res.cast = zext i1 %res to i64
306 define i64 @sub_to_shift_to_add(i32 %x, i32 %y, i64 %s1, i64 %s2) {
307 ; CHECK-LABEL: sub_to_shift_to_add:
309 ; CHECK-NEXT: movq %rdx, %rax
310 ; CHECK-NEXT: addl %esi, %esi
311 ; CHECK-NEXT: cmpl %esi, %edi
312 ; CHECK-NEXT: cmovneq %rcx, %rax
314 %sub = sub i32 %x, %y
315 %cmp = icmp eq i32 %sub, %y
316 %r = select i1 %cmp, i64 %s1, i64 %s2
320 define <4 x float> @sub_to_shift_to_add_vec(<4 x i32> %x, <4 x i32> %y, <4 x float> %s1, <4 x float> %s2) {
321 ; SSE2-LABEL: sub_to_shift_to_add_vec:
323 ; SSE2-NEXT: paddd %xmm1, %xmm1
324 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
325 ; SSE2-NEXT: pand %xmm0, %xmm2
326 ; SSE2-NEXT: pandn %xmm3, %xmm0
327 ; SSE2-NEXT: por %xmm2, %xmm0
330 ; SSE41-LABEL: sub_to_shift_to_add_vec:
332 ; SSE41-NEXT: paddd %xmm1, %xmm1
333 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
334 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
335 ; SSE41-NEXT: movaps %xmm3, %xmm0
337 %sub = sub <4 x i32> %x, %y
338 %cmp = icmp eq <4 x i32> %sub, %y
339 %r = select <4 x i1> %cmp, <4 x float> %s1, <4 x float> %s2
343 define i64 @sub_constant_to_shift_to_add(i32 %x, i64 %s1, i64 %s2) {
344 ; CHECK-LABEL: sub_constant_to_shift_to_add:
346 ; CHECK-NEXT: movq %rsi, %rax
347 ; CHECK-NEXT: addl %edi, %edi
348 ; CHECK-NEXT: cmpl $42, %edi
349 ; CHECK-NEXT: cmovneq %rdx, %rax
351 %sub = sub i32 42, %x
352 %cmp = icmp eq i32 %sub, %x
353 %r = select i1 %cmp, i64 %s1, i64 %s2
357 define float @olt(float %x) {
360 ; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
361 ; CHECK-NEXT: xorps %xmm0, %xmm1
362 ; CHECK-NEXT: minss %xmm1, %xmm0
364 %cmp = fcmp olt float %x, 0.0
366 %r = select i1 %cmp, float %x, float %neg
370 define double @ogt(double %x) {
373 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0]
374 ; CHECK-NEXT: xorpd %xmm0, %xmm1
375 ; CHECK-NEXT: maxsd %xmm1, %xmm0
377 %neg = fneg double %x
378 %cmp = fcmp ogt double %x, 0.0
379 %r = select i1 %cmp, double %x, double %neg
383 define <4 x float> @olt_swap(<4 x float> %x) {
384 ; CHECK-LABEL: olt_swap:
386 ; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
387 ; CHECK-NEXT: xorps %xmm0, %xmm1
388 ; CHECK-NEXT: maxps %xmm0, %xmm1
389 ; CHECK-NEXT: movaps %xmm1, %xmm0
391 %cmp = fcmp olt <4 x float> %x, zeroinitializer
392 %neg = fneg <4 x float> %x
393 %r = select <4 x i1> %cmp, <4 x float> %neg, <4 x float> %x
397 define <2 x double> @ogt_swap(<2 x double> %x) {
398 ; CHECK-LABEL: ogt_swap:
400 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0]
401 ; CHECK-NEXT: xorpd %xmm0, %xmm1
402 ; CHECK-NEXT: minpd %xmm0, %xmm1
403 ; CHECK-NEXT: movapd %xmm1, %xmm0
405 %neg = fneg <2 x double> %x
406 %cmp = fcmp ogt <2 x double> %x, zeroinitializer
407 %r = select <2 x i1> %cmp, <2 x double> %neg, <2 x double> %x
411 define <4 x float> @ole(<4 x float> %x) {
414 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
415 ; SSE2-NEXT: xorps %xmm0, %xmm2
416 ; SSE2-NEXT: movaps %xmm0, %xmm1
417 ; SSE2-NEXT: cmpleps %xmm2, %xmm1
418 ; SSE2-NEXT: andps %xmm1, %xmm2
419 ; SSE2-NEXT: andnps %xmm0, %xmm1
420 ; SSE2-NEXT: orps %xmm2, %xmm1
421 ; SSE2-NEXT: movaps %xmm1, %xmm0
426 ; SSE41-NEXT: movaps %xmm0, %xmm1
427 ; SSE41-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
428 ; SSE41-NEXT: xorps %xmm0, %xmm2
429 ; SSE41-NEXT: cmpleps %xmm2, %xmm0
430 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
431 ; SSE41-NEXT: movaps %xmm1, %xmm0
433 %cmp = fcmp ole <4 x float> %x, zeroinitializer
434 %neg = fneg <4 x float> %x
435 %r = select <4 x i1> %cmp, <4 x float> %neg, <4 x float> %x
439 define <2 x double> @oge(<2 x double> %x) {
442 ; SSE2-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
443 ; SSE2-NEXT: xorpd %xmm0, %xmm2
444 ; SSE2-NEXT: movapd %xmm2, %xmm1
445 ; SSE2-NEXT: cmplepd %xmm0, %xmm1
446 ; SSE2-NEXT: andpd %xmm1, %xmm2
447 ; SSE2-NEXT: andnpd %xmm0, %xmm1
448 ; SSE2-NEXT: orpd %xmm2, %xmm1
449 ; SSE2-NEXT: movapd %xmm1, %xmm0
454 ; SSE41-NEXT: movapd %xmm0, %xmm1
455 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
456 ; SSE41-NEXT: xorpd %xmm0, %xmm2
457 ; SSE41-NEXT: movapd %xmm2, %xmm0
458 ; SSE41-NEXT: cmplepd %xmm1, %xmm0
459 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
460 ; SSE41-NEXT: movapd %xmm1, %xmm0
462 %neg = fneg <2 x double> %x
463 %cmp = fcmp oge <2 x double> %x, zeroinitializer
464 %r = select <2 x i1> %cmp, <2 x double> %neg, <2 x double> %x
468 ; negative test - don't create an fneg to replace 0.0 operand
470 define double @ogt_no_fneg(double %x, double %y) {
471 ; CHECK-LABEL: ogt_no_fneg:
473 ; CHECK-NEXT: xorpd %xmm2, %xmm2
474 ; CHECK-NEXT: cmpltsd %xmm0, %xmm2
475 ; CHECK-NEXT: andpd %xmm2, %xmm0
476 ; CHECK-NEXT: andnpd %xmm1, %xmm2
477 ; CHECK-NEXT: orpd %xmm2, %xmm0
479 %cmp = fcmp ogt double %x, 0.0
480 %r = select i1 %cmp, double %x, double %y
484 ; negative test - can't change the setcc for non-zero constant
486 define double @ogt_no_zero(double %x) {
487 ; CHECK-LABEL: ogt_no_zero:
489 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0]
490 ; CHECK-NEXT: xorpd %xmm0, %xmm1
491 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
492 ; CHECK-NEXT: cmpltsd %xmm0, %xmm2
493 ; CHECK-NEXT: andpd %xmm2, %xmm0
494 ; CHECK-NEXT: andnpd %xmm1, %xmm2
495 ; CHECK-NEXT: orpd %xmm2, %xmm0
497 %neg = fneg double %x
498 %cmp = fcmp ogt double %x, 1.0
499 %r = select i1 %cmp, double %x, double %neg
503 define i64 @cmp_sgt_not(i64 %a, i64 %b) {
504 ; CHECK-LABEL: cmp_sgt_not:
506 ; CHECK-NEXT: xorl %eax, %eax
507 ; CHECK-NEXT: cmpq %rdi, %rsi
508 ; CHECK-NEXT: setg %al
509 ; CHECK-NEXT: negq %rax
513 %c = icmp sgt i64 %na, %nb
514 %r = sext i1 %c to i64
518 define i64 @cmp_sgt_not_with_constant(i64 %a) {
519 ; CHECK-LABEL: cmp_sgt_not_with_constant:
521 ; CHECK-NEXT: xorl %eax, %eax
522 ; CHECK-NEXT: cmpq $-43, %rdi
523 ; CHECK-NEXT: setl %al
524 ; CHECK-NEXT: negq %rax
527 %c = icmp sgt i64 %na, 42
528 %r = sext i1 %c to i64
532 define <4 x i32> @cmp_sgt_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
533 ; CHECK-LABEL: cmp_sgt_not_with_vec:
535 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
536 ; CHECK-NEXT: movdqa %xmm1, %xmm0
538 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
539 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
540 %c = icmp sgt <4 x i32> %na, %nb
541 %r = sext <4 x i1> %c to <4 x i32>
545 define i64 @cmp_ugt_not(i64 %a, i64 %b) {
546 ; CHECK-LABEL: cmp_ugt_not:
548 ; CHECK-NEXT: notq %rdi
549 ; CHECK-NEXT: xorl %eax, %eax
550 ; CHECK-NEXT: addq %rsi, %rdi
551 ; CHECK-NEXT: sbbq %rax, %rax
555 %c = icmp ugt i64 %na, %nb
556 %r = sext i1 %c to i64
560 define i64 @cmp_ugt_not_with_constant(i64 %a) {
561 ; CHECK-LABEL: cmp_ugt_not_with_constant:
563 ; CHECK-NEXT: xorl %eax, %eax
564 ; CHECK-NEXT: cmpq $-43, %rdi
565 ; CHECK-NEXT: sbbq %rax, %rax
568 %c = icmp ugt i64 %na, 42
569 %r = sext i1 %c to i64
573 define <4 x i32> @cmp_ugt_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
574 ; SSE2-LABEL: cmp_ugt_not_with_vec:
576 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
577 ; SSE2-NEXT: pxor %xmm2, %xmm0
578 ; SSE2-NEXT: pxor %xmm1, %xmm2
579 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
580 ; SSE2-NEXT: movdqa %xmm2, %xmm0
583 ; SSE41-LABEL: cmp_ugt_not_with_vec:
585 ; SSE41-NEXT: pminud %xmm1, %xmm0
586 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
587 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
588 ; SSE41-NEXT: pxor %xmm1, %xmm0
590 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
591 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
592 %c = icmp ugt <4 x i32> %na, %nb
593 %r = sext <4 x i1> %c to <4 x i32>
597 define i64 @cmp_sge_not(i64 %a, i64 %b) {
598 ; CHECK-LABEL: cmp_sge_not:
600 ; CHECK-NEXT: xorl %eax, %eax
601 ; CHECK-NEXT: cmpq %rdi, %rsi
602 ; CHECK-NEXT: setge %al
603 ; CHECK-NEXT: negq %rax
607 %c = icmp sge i64 %na, %nb
608 %r = sext i1 %c to i64
612 define i64 @cmp_sge_not_with_constant(i64 %a) {
613 ; CHECK-LABEL: cmp_sge_not_with_constant:
615 ; CHECK-NEXT: xorl %eax, %eax
616 ; CHECK-NEXT: cmpq $-42, %rdi
617 ; CHECK-NEXT: setl %al
618 ; CHECK-NEXT: negq %rax
621 %c = icmp sge i64 %na, 42
622 %r = sext i1 %c to i64
626 define <4 x i32> @cmp_sge_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
627 ; SSE2-LABEL: cmp_sge_not_with_vec:
629 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
630 ; SSE2-NEXT: pxor %xmm2, %xmm1
631 ; SSE2-NEXT: pxor %xmm2, %xmm0
632 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
633 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
634 ; SSE2-NEXT: pxor %xmm1, %xmm0
637 ; SSE41-LABEL: cmp_sge_not_with_vec:
639 ; SSE41-NEXT: pmaxud %xmm1, %xmm0
640 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
642 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
643 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
644 %c = icmp uge <4 x i32> %na, %nb
645 %r = sext <4 x i1> %c to <4 x i32>
649 define i64 @cmp_uge_not(i64 %a, i64 %b) {
650 ; CHECK-LABEL: cmp_uge_not:
652 ; CHECK-NEXT: xorl %eax, %eax
653 ; CHECK-NEXT: cmpq %rdi, %rsi
654 ; CHECK-NEXT: adcq $-1, %rax
658 %c = icmp uge i64 %na, %nb
659 %r = sext i1 %c to i64
663 define i64 @cmp_uge_not_with_constant(i64 %a) {
664 ; CHECK-LABEL: cmp_uge_not_with_constant:
666 ; CHECK-NEXT: xorl %eax, %eax
667 ; CHECK-NEXT: cmpq $-42, %rdi
668 ; CHECK-NEXT: sbbq %rax, %rax
671 %c = icmp uge i64 %na, 42
672 %r = sext i1 %c to i64
676 define <4 x i32> @cmp_uge_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
677 ; SSE2-LABEL: cmp_uge_not_with_vec:
679 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
680 ; SSE2-NEXT: pxor %xmm2, %xmm1
681 ; SSE2-NEXT: pxor %xmm2, %xmm0
682 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
683 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
684 ; SSE2-NEXT: pxor %xmm1, %xmm0
687 ; SSE41-LABEL: cmp_uge_not_with_vec:
689 ; SSE41-NEXT: pmaxud %xmm1, %xmm0
690 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
692 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
693 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
694 %c = icmp uge <4 x i32> %na, %nb
695 %r = sext <4 x i1> %c to <4 x i32>
699 define i64 @cmp_sle_not(i64 %a, i64 %b) {
700 ; CHECK-LABEL: cmp_sle_not:
702 ; CHECK-NEXT: xorl %eax, %eax
703 ; CHECK-NEXT: cmpq %rdi, %rsi
704 ; CHECK-NEXT: setle %al
705 ; CHECK-NEXT: negq %rax
709 %c = icmp sle i64 %na, %nb
710 %r = sext i1 %c to i64
714 define i64 @cmp_sle_not_with_constant(i64 %a) {
715 ; CHECK-LABEL: cmp_sle_not_with_constant:
717 ; CHECK-NEXT: xorl %eax, %eax
718 ; CHECK-NEXT: cmpq $-43, %rdi
719 ; CHECK-NEXT: setge %al
720 ; CHECK-NEXT: negq %rax
723 %c = icmp sle i64 %na, 42
724 %r = sext i1 %c to i64
728 define <4 x i32> @cmp_sle_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
729 ; CHECK-LABEL: cmp_sle_not_with_vec:
731 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
732 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
733 ; CHECK-NEXT: pxor %xmm1, %xmm0
735 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
736 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
737 %c = icmp sle <4 x i32> %na, %nb
738 %r = sext <4 x i1> %c to <4 x i32>
742 define i64 @cmp_slt_not(i64 %a, i64 %b) {
743 ; CHECK-LABEL: cmp_slt_not:
745 ; CHECK-NEXT: xorl %eax, %eax
746 ; CHECK-NEXT: cmpq %rdi, %rsi
747 ; CHECK-NEXT: setl %al
748 ; CHECK-NEXT: negq %rax
752 %c = icmp slt i64 %na, %nb
753 %r = sext i1 %c to i64
757 define i64 @cmp_slt_not_with_constant(i64 %a) {
758 ; CHECK-LABEL: cmp_slt_not_with_constant:
760 ; CHECK-NEXT: xorl %eax, %eax
761 ; CHECK-NEXT: cmpq $-42, %rdi
762 ; CHECK-NEXT: setge %al
763 ; CHECK-NEXT: negq %rax
766 %c = icmp slt i64 %na, 42
767 %r = sext i1 %c to i64
771 define <4 x i32> @cmp_slt_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
772 ; CHECK-LABEL: cmp_slt_not_with_vec:
774 ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
776 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
777 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
778 %c = icmp slt <4 x i32> %na, %nb
779 %r = sext <4 x i1> %c to <4 x i32>
784 define i64 @cmp_ult_not(i64 %a, i64 %b) {
785 ; CHECK-LABEL: cmp_ult_not:
787 ; CHECK-NEXT: notq %rsi
788 ; CHECK-NEXT: xorl %eax, %eax
789 ; CHECK-NEXT: addq %rdi, %rsi
790 ; CHECK-NEXT: sbbq %rax, %rax
794 %c = icmp ult i64 %na, %nb
795 %r = sext i1 %c to i64
799 define i64 @cmp_ult_not_with_constant(i64 %a) {
800 ; CHECK-LABEL: cmp_ult_not_with_constant:
802 ; CHECK-NEXT: xorl %eax, %eax
803 ; CHECK-NEXT: addq $42, %rdi
804 ; CHECK-NEXT: sbbq %rax, %rax
807 %c = icmp ult i64 %na, 42
808 %r = sext i1 %c to i64
812 define <4 x i32> @cmp_ult_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
813 ; SSE2-LABEL: cmp_ult_not_with_vec:
815 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
816 ; SSE2-NEXT: pxor %xmm2, %xmm1
817 ; SSE2-NEXT: pxor %xmm2, %xmm0
818 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
821 ; SSE41-LABEL: cmp_ult_not_with_vec:
823 ; SSE41-NEXT: pmaxud %xmm1, %xmm0
824 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
825 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
826 ; SSE41-NEXT: pxor %xmm1, %xmm0
828 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
829 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
830 %c = icmp ult <4 x i32> %na, %nb
831 %r = sext <4 x i1> %c to <4 x i32>
835 define i64 @cmp_ule_not(i64 %a, i64 %b) {
836 ; CHECK-LABEL: cmp_ule_not:
838 ; CHECK-NEXT: xorl %eax, %eax
839 ; CHECK-NEXT: cmpq %rsi, %rdi
840 ; CHECK-NEXT: adcq $-1, %rax
844 %c = icmp ule i64 %na, %nb
845 %r = sext i1 %c to i64
849 define i64 @cmp_ule_not_with_constant(i64 %a) {
850 ; CHECK-LABEL: cmp_ule_not_with_constant:
852 ; CHECK-NEXT: xorl %eax, %eax
853 ; CHECK-NEXT: cmpq $-43, %rdi
854 ; CHECK-NEXT: adcq $-1, %rax
857 %c = icmp ule i64 %na, 42
858 %r = sext i1 %c to i64
862 define <4 x i32> @cmp_ule_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
863 ; SSE2-LABEL: cmp_ule_not_with_vec:
865 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
866 ; SSE2-NEXT: pxor %xmm2, %xmm0
867 ; SSE2-NEXT: pxor %xmm1, %xmm2
868 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
869 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
870 ; SSE2-NEXT: pxor %xmm2, %xmm0
873 ; SSE41-LABEL: cmp_ule_not_with_vec:
875 ; SSE41-NEXT: pminud %xmm1, %xmm0
876 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
878 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
879 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
880 %c = icmp ule <4 x i32> %na, %nb
881 %r = sext <4 x i1> %c to <4 x i32>
885 define i64 @cmp_eq_not(i64 %a, i64 %b) {
886 ; CHECK-LABEL: cmp_eq_not:
888 ; CHECK-NEXT: xorl %eax, %eax
889 ; CHECK-NEXT: cmpq %rsi, %rdi
890 ; CHECK-NEXT: sete %al
891 ; CHECK-NEXT: negq %rax
895 %c = icmp eq i64 %na, %nb
896 %r = sext i1 %c to i64
900 define i64 @cmp_eq_not_with_constant(i64 %a) {
901 ; CHECK-LABEL: cmp_eq_not_with_constant:
903 ; CHECK-NEXT: xorl %eax, %eax
904 ; CHECK-NEXT: cmpq $-43, %rdi
905 ; CHECK-NEXT: sete %al
906 ; CHECK-NEXT: negq %rax
909 %c = icmp eq i64 %na, 42
910 %r = sext i1 %c to i64
914 define <4 x i32> @cmp_eq_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
915 ; CHECK-LABEL: cmp_eq_not_with_vec:
917 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
919 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
920 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
921 %c = icmp eq <4 x i32> %na, %nb
922 %r = sext <4 x i1> %c to <4 x i32>
925 define i64 @cmp_ne_not(i64 %a, i64 %b) {
926 ; CHECK-LABEL: cmp_ne_not:
928 ; CHECK-NEXT: xorl %eax, %eax
929 ; CHECK-NEXT: cmpq %rsi, %rdi
930 ; CHECK-NEXT: setne %al
931 ; CHECK-NEXT: negq %rax
935 %c = icmp ne i64 %na, %nb
936 %r = sext i1 %c to i64
940 define i64 @cmp_ne_not_with_constant(i64 %a) {
941 ; CHECK-LABEL: cmp_ne_not_with_constant:
943 ; CHECK-NEXT: xorl %eax, %eax
944 ; CHECK-NEXT: cmpq $-43, %rdi
945 ; CHECK-NEXT: setne %al
946 ; CHECK-NEXT: negq %rax
949 %c = icmp ne i64 %na, 42
950 %r = sext i1 %c to i64
954 define <4 x i32> @cmp_ne_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
955 ; CHECK-LABEL: cmp_ne_not_with_vec:
957 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
958 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
959 ; CHECK-NEXT: pxor %xmm1, %xmm0
961 %na = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
962 %nb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
963 %c = icmp ne <4 x i32> %na, %nb
964 %r = sext <4 x i1> %c to <4 x i32>
968 define i64 @cmp_uge_not_commute(i64 %b, i64 %a) {
969 ; CHECK-LABEL: cmp_uge_not_commute:
971 ; CHECK-NEXT: xorl %eax, %eax
972 ; CHECK-NEXT: cmpq %rsi, %rdi
973 ; CHECK-NEXT: adcq $-1, %rax
977 %c = icmp uge i64 %na, %nb
978 %r = sext i1 %c to i64
982 define i64 @cmp_ult_not_with_constant_commute(i64 %a) {
983 ; CHECK-LABEL: cmp_ult_not_with_constant_commute:
985 ; CHECK-NEXT: xorl %eax, %eax
986 ; CHECK-NEXT: cmpq $43, %rdi
987 ; CHECK-NEXT: adcq $-1, %rax
990 %c = icmp ult i64 42, %a
991 %r = sext i1 %c to i64
995 define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) {
996 ; CHECK-LABEL: cmp_uge_not_with_vec2xi64:
998 ; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
999 ; CHECK-NEXT: pxor %xmm2, %xmm1
1000 ; CHECK-NEXT: pxor %xmm2, %xmm0
1001 ; CHECK-NEXT: movdqa %xmm0, %xmm2
1002 ; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
1003 ; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
1004 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
1005 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1006 ; CHECK-NEXT: pand %xmm3, %xmm0
1007 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1008 ; CHECK-NEXT: por %xmm0, %xmm1
1009 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
1010 ; CHECK-NEXT: pxor %xmm1, %xmm0
1012 %na = xor <2 x i64> %a, <i64 -1, i64 -1>
1013 %nb = xor <2 x i64> %b, <i64 -1, i64 -1>
1014 %c = icmp uge <2 x i64> %na, %nb
1015 %r = sext <2 x i1> %c to <2 x i64>