1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR
9 define i32 @andn32(i32 %x, i32 %y) {
12 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
13 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
18 ; X64-NEXT: andnl %esi, %edi, %eax
20 %tmp1 = xor i32 %x, -1
21 %tmp2 = and i32 %y, %tmp1
25 define i32 @andn32_load(i32 %x, ptr %y) {
26 ; X86-LABEL: andn32_load:
28 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
29 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
30 ; X86-NEXT: andnl (%eax), %ecx, %eax
33 ; X64-LABEL: andn32_load:
35 ; X64-NEXT: andnl (%rsi), %edi, %eax
37 %y1 = load i32, ptr %y
38 %tmp1 = xor i32 %x, -1
39 %tmp2 = and i32 %y1, %tmp1
43 define i64 @andn64(i64 %x, i64 %y) {
46 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
47 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
48 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
49 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
54 ; X64-NEXT: andnq %rsi, %rdi, %rax
56 %tmp1 = xor i64 %x, -1
57 %tmp2 = and i64 %tmp1, %y
61 ; Don't choose a 'test' if an 'andn' can be used.
62 define i1 @andn_cmp(i32 %x, i32 %y) {
63 ; X86-LABEL: andn_cmp:
65 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
66 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
70 ; X64-LABEL: andn_cmp:
72 ; X64-NEXT: andnl %esi, %edi, %eax
75 %notx = xor i32 %x, -1
76 %and = and i32 %notx, %y
77 %cmp = icmp eq i32 %and, 0
81 ; Recognize a disguised andn in the following 4 tests.
82 define i1 @and_cmp1(i32 %x, i32 %y) {
83 ; X86-LABEL: and_cmp1:
85 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
86 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
90 ; X64-LABEL: and_cmp1:
92 ; X64-NEXT: andnl %esi, %edi, %eax
96 %cmp = icmp eq i32 %and, %y
100 define i1 @and_cmp2(i32 %x, i32 %y) {
101 ; X86-LABEL: and_cmp2:
103 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
104 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
105 ; X86-NEXT: setne %al
108 ; X64-LABEL: and_cmp2:
110 ; X64-NEXT: andnl %esi, %edi, %eax
111 ; X64-NEXT: setne %al
113 %and = and i32 %y, %x
114 %cmp = icmp ne i32 %and, %y
118 define i1 @and_cmp3(i32 %x, i32 %y) {
119 ; X86-LABEL: and_cmp3:
121 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
122 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
126 ; X64-LABEL: and_cmp3:
128 ; X64-NEXT: andnl %esi, %edi, %eax
131 %and = and i32 %x, %y
132 %cmp = icmp eq i32 %y, %and
136 define i1 @and_cmp4(i32 %x, i32 %y) {
137 ; X86-LABEL: and_cmp4:
139 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
140 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
141 ; X86-NEXT: setne %al
144 ; X64-LABEL: and_cmp4:
146 ; X64-NEXT: andnl %esi, %edi, %eax
147 ; X64-NEXT: setne %al
149 %and = and i32 %y, %x
150 %cmp = icmp ne i32 %y, %and
154 ; A mask and compare against constant is ok for an 'andn' too
155 ; even though the BMI instruction doesn't have an immediate form.
156 define i1 @and_cmp_const(i32 %x) {
157 ; X86-LABEL: and_cmp_const:
159 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
160 ; X86-NEXT: notl %eax
161 ; X86-NEXT: testb $43, %al
165 ; X64-LABEL: and_cmp_const:
167 ; X64-NEXT: notl %edi
168 ; X64-NEXT: testb $43, %dil
171 %and = and i32 %x, 43
172 %cmp = icmp eq i32 %and, 43
176 ; But don't use 'andn' if the mask is a power-of-two.
177 define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
178 ; X86-LABEL: and_cmp_const_power_of_two:
180 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
181 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
182 ; X86-NEXT: btl %ecx, %eax
183 ; X86-NEXT: setae %al
186 ; X64-LABEL: and_cmp_const_power_of_two:
188 ; X64-NEXT: btl %esi, %edi
189 ; X64-NEXT: setae %al
192 %and = and i32 %x, %shl
193 %cmp = icmp ne i32 %and, %shl
197 ; Don't transform to 'andn' if there's another use of the 'and'.
198 define i32 @and_cmp_not_one_use(i32 %x) {
199 ; X86-LABEL: and_cmp_not_one_use:
201 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
202 ; X86-NEXT: andl $37, %ecx
203 ; X86-NEXT: xorl %eax, %eax
204 ; X86-NEXT: cmpl $37, %ecx
206 ; X86-NEXT: addl %ecx, %eax
209 ; X64-LABEL: and_cmp_not_one_use:
211 ; X64-NEXT: andl $37, %edi
212 ; X64-NEXT: xorl %eax, %eax
213 ; X64-NEXT: cmpl $37, %edi
215 ; X64-NEXT: addl %edi, %eax
217 %and = and i32 %x, 37
218 %cmp = icmp eq i32 %and, 37
219 %ext = zext i1 %cmp to i32
220 %add = add i32 %and, %ext
224 ; Verify that we're not transforming invalid comparison predicates.
225 define i1 @not_an_andn1(i32 %x, i32 %y) {
226 ; X86-LABEL: not_an_andn1:
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
229 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
230 ; X86-NEXT: andl %eax, %ecx
231 ; X86-NEXT: cmpl %ecx, %eax
235 ; X64-LABEL: not_an_andn1:
237 ; X64-NEXT: andl %esi, %edi
238 ; X64-NEXT: cmpl %edi, %esi
241 %and = and i32 %x, %y
242 %cmp = icmp sgt i32 %y, %and
246 define i1 @not_an_andn2(i32 %x, i32 %y) {
247 ; X86-LABEL: not_an_andn2:
249 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
250 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
251 ; X86-NEXT: andl %eax, %ecx
252 ; X86-NEXT: cmpl %ecx, %eax
253 ; X86-NEXT: setbe %al
256 ; X64-LABEL: not_an_andn2:
258 ; X64-NEXT: andl %esi, %edi
259 ; X64-NEXT: cmpl %edi, %esi
260 ; X64-NEXT: setbe %al
262 %and = and i32 %y, %x
263 %cmp = icmp ule i32 %y, %and
267 ; Don't choose a 'test' if an 'andn' can be used.
268 define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
269 ; X86-LABEL: andn_cmp_swap_ops:
271 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
272 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
273 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %ecx
274 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
275 ; X86-NEXT: orl %ecx, %eax
279 ; X64-LABEL: andn_cmp_swap_ops:
281 ; X64-NEXT: andnq %rsi, %rdi, %rax
284 %notx = xor i64 %x, -1
285 %and = and i64 %y, %notx
286 %cmp = icmp eq i64 %and, 0
290 ; Use a 'test' (not an 'and') because 'andn' only works for i32/i64.
291 define i1 @andn_cmp_i8(i8 %x, i8 %y) {
292 ; X86-LABEL: andn_cmp_i8:
294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
296 ; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
300 ; X64-LABEL: andn_cmp_i8:
302 ; X64-NEXT: notb %sil
303 ; X64-NEXT: testb %sil, %dil
306 %noty = xor i8 %y, -1
307 %and = and i8 %x, %noty
308 %cmp = icmp eq i8 %and, 0
312 ; PR48768 - 'andn' clears the overflow flag, so we don't need a separate 'test'.
313 define i1 @andn_cmp_i32_overflow(i32 %x, i32 %y) {
314 ; X86-LABEL: andn_cmp_i32_overflow:
316 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
317 ; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
318 ; X86-NEXT: setle %al
321 ; X64-LABEL: andn_cmp_i32_overflow:
323 ; X64-NEXT: andnl %edi, %esi, %eax
324 ; X64-NEXT: setle %al
326 %noty = xor i32 %y, -1
327 %and = and i32 %x, %noty
328 %cmp = icmp slt i32 %and, 1
332 declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
334 define i32 @bextr32(i32 %x, i32 %y) {
335 ; X86-LABEL: bextr32:
337 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
338 ; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
341 ; X64-LABEL: bextr32:
343 ; X64-NEXT: bextrl %esi, %edi, %eax
345 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
349 define i32 @bextr32_load(ptr %x, i32 %y) {
350 ; X86-LABEL: bextr32_load:
352 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
353 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
354 ; X86-NEXT: bextrl %eax, (%ecx), %eax
357 ; X64-LABEL: bextr32_load:
359 ; X64-NEXT: bextrl %esi, (%rdi), %eax
361 %x1 = load i32, ptr %x
362 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
366 define i32 @bextr32b(i32 %x) uwtable ssp {
367 ; X86-SLOW-BEXTR-LABEL: bextr32b:
368 ; X86-SLOW-BEXTR: # %bb.0:
369 ; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax
370 ; X86-SLOW-BEXTR-NEXT: shrl $4, %eax
371 ; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF
372 ; X86-SLOW-BEXTR-NEXT: retl
374 ; X64-SLOW-BEXTR-LABEL: bextr32b:
375 ; X64-SLOW-BEXTR: # %bb.0:
376 ; X64-SLOW-BEXTR-NEXT: movl %edi, %eax
377 ; X64-SLOW-BEXTR-NEXT: shrl $4, %eax
378 ; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF
379 ; X64-SLOW-BEXTR-NEXT: retq
381 ; X86-FAST-BEXTR-LABEL: bextr32b:
382 ; X86-FAST-BEXTR: # %bb.0:
383 ; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04
384 ; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
385 ; X86-FAST-BEXTR-NEXT: retl
387 ; X64-FAST-BEXTR-LABEL: bextr32b:
388 ; X64-FAST-BEXTR: # %bb.0:
389 ; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04
390 ; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax
391 ; X64-FAST-BEXTR-NEXT: retq
393 %2 = and i32 %1, 4095
397 ; Make sure we still use AH subreg trick to extract 15:8
398 define i32 @bextr32_subreg(i32 %x) uwtable ssp {
399 ; X86-LABEL: bextr32_subreg:
401 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
404 ; X64-LABEL: bextr32_subreg:
406 ; X64-NEXT: movl %edi, %eax
407 ; X64-NEXT: movzbl %ah, %eax
414 define i32 @bextr32b_load(ptr %x) uwtable ssp {
415 ; X86-SLOW-BEXTR-LABEL: bextr32b_load:
416 ; X86-SLOW-BEXTR: # %bb.0:
417 ; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax
418 ; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax
419 ; X86-SLOW-BEXTR-NEXT: shrl $4, %eax
420 ; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF
421 ; X86-SLOW-BEXTR-NEXT: retl
423 ; X64-SLOW-BEXTR-LABEL: bextr32b_load:
424 ; X64-SLOW-BEXTR: # %bb.0:
425 ; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax
426 ; X64-SLOW-BEXTR-NEXT: shrl $4, %eax
427 ; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF
428 ; X64-SLOW-BEXTR-NEXT: retq
430 ; X86-FAST-BEXTR-LABEL: bextr32b_load:
431 ; X86-FAST-BEXTR: # %bb.0:
432 ; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax
433 ; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04
434 ; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax
435 ; X86-FAST-BEXTR-NEXT: retl
437 ; X64-FAST-BEXTR-LABEL: bextr32b_load:
438 ; X64-FAST-BEXTR: # %bb.0:
439 ; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04
440 ; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax
441 ; X64-FAST-BEXTR-NEXT: retq
442 %1 = load i32, ptr %x
444 %3 = and i32 %2, 4095
449 define i32 @bextr32c(i32 %x, i16 zeroext %y) {
450 ; X86-LABEL: bextr32c:
452 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
453 ; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
456 ; X64-LABEL: bextr32c:
458 ; X64-NEXT: bextrl %esi, %edi, %eax
460 %tmp0 = sext i16 %y to i32
461 %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
465 define i32 @non_bextr32(i32 %x) {
466 ; X86-LABEL: non_bextr32:
467 ; X86: # %bb.0: # %entry
468 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
469 ; X86-NEXT: shrl $2, %eax
470 ; X86-NEXT: andl $111, %eax
473 ; X64-LABEL: non_bextr32:
474 ; X64: # %bb.0: # %entry
475 ; X64-NEXT: movl %edi, %eax
476 ; X64-NEXT: shrl $2, %eax
477 ; X64-NEXT: andl $111, %eax
480 %shr = lshr i32 %x, 2
481 %and = and i32 %shr, 111
485 define i32 @blsi32(i32 %x) {
488 ; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax
493 ; X64-NEXT: blsil %edi, %eax
496 %tmp2 = and i32 %x, %tmp
500 define i32 @blsi32_load(ptr %x) {
501 ; X86-LABEL: blsi32_load:
503 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
504 ; X86-NEXT: blsil (%eax), %eax
507 ; X64-LABEL: blsi32_load:
509 ; X64-NEXT: blsil (%rdi), %eax
511 %x1 = load i32, ptr %x
512 %tmp = sub i32 0, %x1
513 %tmp2 = and i32 %x1, %tmp
517 define i32 @blsi32_z(i32 %a, i32 %b) nounwind {
518 ; X86-LABEL: blsi32_z:
520 ; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax
521 ; X86-NEXT: jne .LBB25_2
523 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
524 ; X86-NEXT: .LBB25_2:
527 ; X64-LABEL: blsi32_z:
529 ; X64-NEXT: blsil %edi, %eax
530 ; X64-NEXT: cmovel %esi, %eax
533 %t1 = and i32 %t0, %a
534 %t2 = icmp eq i32 %t1, 0
535 %t3 = select i1 %t2, i32 %b, i32 %t1
539 define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind {
540 ; X86-LABEL: blsi32_z2:
542 ; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax
543 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
544 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
545 ; X86-NEXT: cmovel %eax, %ecx
546 ; X86-NEXT: movl (%ecx), %eax
549 ; X64-LABEL: blsi32_z2:
551 ; X64-NEXT: movl %esi, %eax
552 ; X64-NEXT: blsil %edi, %ecx
553 ; X64-NEXT: cmovnel %edx, %eax
556 %t1 = and i32 %t0, %a
557 %t2 = icmp eq i32 %t1, 0
558 %t3 = select i1 %t2, i32 %b, i32 %c
562 ; Inspired by PR48768, but using cmovcc instead of setcc. There should be
563 ; no test instruction.
564 define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind {
565 ; X86-LABEL: blsi32_sle:
567 ; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax
568 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
569 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
570 ; X86-NEXT: cmovlel %eax, %ecx
571 ; X86-NEXT: movl (%ecx), %eax
574 ; X64-LABEL: blsi32_sle:
576 ; X64-NEXT: movl %esi, %eax
577 ; X64-NEXT: blsil %edi, %ecx
578 ; X64-NEXT: cmovgl %edx, %eax
581 %t1 = and i32 %t0, %a
582 %t2 = icmp sle i32 %t1, 0
583 %t3 = select i1 %t2, i32 %b, i32 %c
587 define i64 @blsi64(i64 %x) {
590 ; X86-NEXT: pushl %esi
591 ; X86-NEXT: .cfi_def_cfa_offset 8
592 ; X86-NEXT: .cfi_offset %esi, -8
593 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
594 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
595 ; X86-NEXT: xorl %edx, %edx
596 ; X86-NEXT: movl %ecx, %eax
597 ; X86-NEXT: negl %eax
598 ; X86-NEXT: sbbl %esi, %edx
599 ; X86-NEXT: andl %esi, %edx
600 ; X86-NEXT: andl %ecx, %eax
601 ; X86-NEXT: popl %esi
602 ; X86-NEXT: .cfi_def_cfa_offset 4
607 ; X64-NEXT: blsiq %rdi, %rax
610 %tmp2 = and i64 %tmp, %x
614 define i64 @blsi64_z(i64 %a, i64 %b) nounwind {
615 ; X86-LABEL: blsi64_z:
617 ; X86-NEXT: pushl %esi
618 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
619 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
620 ; X86-NEXT: xorl %edx, %edx
621 ; X86-NEXT: movl %ecx, %eax
622 ; X86-NEXT: negl %eax
623 ; X86-NEXT: sbbl %esi, %edx
624 ; X86-NEXT: andl %esi, %edx
625 ; X86-NEXT: andl %ecx, %eax
626 ; X86-NEXT: movl %eax, %ecx
627 ; X86-NEXT: orl %edx, %ecx
628 ; X86-NEXT: jne .LBB29_2
630 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
631 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
632 ; X86-NEXT: .LBB29_2:
633 ; X86-NEXT: popl %esi
636 ; X64-LABEL: blsi64_z:
638 ; X64-NEXT: blsiq %rdi, %rax
639 ; X64-NEXT: cmoveq %rsi, %rax
642 %t1 = and i64 %t0, %a
643 %t2 = icmp eq i64 %t1, 0
644 %t3 = select i1 %t2, i64 %b, i64 %t1
648 define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind {
649 ; X86-LABEL: blsi64_z2:
651 ; X86-NEXT: pushl %esi
652 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
653 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
654 ; X86-NEXT: xorl %edx, %edx
655 ; X86-NEXT: movl %eax, %esi
656 ; X86-NEXT: negl %esi
657 ; X86-NEXT: sbbl %ecx, %edx
658 ; X86-NEXT: andl %ecx, %edx
659 ; X86-NEXT: andl %eax, %esi
660 ; X86-NEXT: orl %edx, %esi
661 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
662 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
663 ; X86-NEXT: cmovel %eax, %ecx
664 ; X86-NEXT: movl (%ecx), %eax
665 ; X86-NEXT: movl 4(%ecx), %edx
666 ; X86-NEXT: popl %esi
669 ; X64-LABEL: blsi64_z2:
671 ; X64-NEXT: movq %rsi, %rax
672 ; X64-NEXT: blsiq %rdi, %rcx
673 ; X64-NEXT: cmovneq %rdx, %rax
676 %t1 = and i64 %t0, %a
677 %t2 = icmp eq i64 %t1, 0
678 %t3 = select i1 %t2, i64 %b, i64 %c
682 define i64 @blsi64_sle(i64 %a, i64 %b, i64 %c) nounwind {
683 ; X86-LABEL: blsi64_sle:
685 ; X86-NEXT: pushl %esi
686 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
687 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
688 ; X86-NEXT: xorl %edx, %edx
689 ; X86-NEXT: movl %eax, %esi
690 ; X86-NEXT: negl %esi
691 ; X86-NEXT: sbbl %ecx, %edx
692 ; X86-NEXT: andl %ecx, %edx
693 ; X86-NEXT: andl %eax, %esi
694 ; X86-NEXT: cmpl $1, %esi
695 ; X86-NEXT: sbbl $0, %edx
696 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
697 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
698 ; X86-NEXT: cmovll %eax, %ecx
699 ; X86-NEXT: movl (%ecx), %eax
700 ; X86-NEXT: movl 4(%ecx), %edx
701 ; X86-NEXT: popl %esi
704 ; X64-LABEL: blsi64_sle:
706 ; X64-NEXT: movq %rsi, %rax
707 ; X64-NEXT: blsiq %rdi, %rcx
708 ; X64-NEXT: cmovgq %rdx, %rax
711 %t1 = and i64 %t0, %a
712 %t2 = icmp sle i64 %t1, 0
713 %t3 = select i1 %t2, i64 %b, i64 %c
717 define i32 @blsmsk32(i32 %x) {
718 ; X86-LABEL: blsmsk32:
720 ; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
723 ; X64-LABEL: blsmsk32:
725 ; X64-NEXT: blsmskl %edi, %eax
728 %tmp2 = xor i32 %x, %tmp
732 define i32 @blsmsk32_load(ptr %x) {
733 ; X86-LABEL: blsmsk32_load:
735 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
736 ; X86-NEXT: blsmskl (%eax), %eax
739 ; X64-LABEL: blsmsk32_load:
741 ; X64-NEXT: blsmskl (%rdi), %eax
743 %x1 = load i32, ptr %x
744 %tmp = sub i32 %x1, 1
745 %tmp2 = xor i32 %x1, %tmp
749 define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind {
750 ; X86-LABEL: blsmsk32_z:
752 ; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
753 ; X86-NEXT: jne .LBB34_2
755 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
756 ; X86-NEXT: .LBB34_2:
759 ; X64-LABEL: blsmsk32_z:
761 ; X64-NEXT: blsmskl %edi, %eax
762 ; X64-NEXT: cmovel %esi, %eax
765 %t1 = xor i32 %t0, %a
766 %t2 = icmp eq i32 %t1, 0
767 %t3 = select i1 %t2, i32 %b, i32 %t1
771 define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
772 ; X86-LABEL: blsmsk32_z2:
774 ; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
775 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
776 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
777 ; X86-NEXT: cmovel %eax, %ecx
778 ; X86-NEXT: movl (%ecx), %eax
781 ; X64-LABEL: blsmsk32_z2:
783 ; X64-NEXT: movl %esi, %eax
784 ; X64-NEXT: blsmskl %edi, %ecx
785 ; X64-NEXT: cmovnel %edx, %eax
788 %t1 = xor i32 %t0, %a
789 %t2 = icmp eq i32 %t1, 0
790 %t3 = select i1 %t2, i32 %b, i32 %c
794 define i32 @blsmsk32_sle(i32 %a, i32 %b, i32 %c) nounwind {
795 ; X86-LABEL: blsmsk32_sle:
797 ; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
798 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
799 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
800 ; X86-NEXT: cmovlel %eax, %ecx
801 ; X86-NEXT: movl (%ecx), %eax
804 ; X64-LABEL: blsmsk32_sle:
806 ; X64-NEXT: movl %esi, %eax
807 ; X64-NEXT: blsmskl %edi, %ecx
808 ; X64-NEXT: cmovgl %edx, %eax
811 %t1 = xor i32 %t0, %a
812 %t2 = icmp sle i32 %t1, 0
813 %t3 = select i1 %t2, i32 %b, i32 %c
817 define i64 @blsmsk64(i64 %x) {
818 ; X86-LABEL: blsmsk64:
820 ; X86-NEXT: pushl %esi
821 ; X86-NEXT: .cfi_def_cfa_offset 8
822 ; X86-NEXT: .cfi_offset %esi, -8
823 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
824 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
825 ; X86-NEXT: movl %ecx, %eax
826 ; X86-NEXT: addl $-1, %eax
827 ; X86-NEXT: movl %esi, %edx
828 ; X86-NEXT: adcl $-1, %edx
829 ; X86-NEXT: xorl %ecx, %eax
830 ; X86-NEXT: xorl %esi, %edx
831 ; X86-NEXT: popl %esi
832 ; X86-NEXT: .cfi_def_cfa_offset 4
835 ; X64-LABEL: blsmsk64:
837 ; X64-NEXT: blsmskq %rdi, %rax
840 %tmp2 = xor i64 %tmp, %x
844 define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind {
845 ; X86-LABEL: blsmsk64_z:
847 ; X86-NEXT: pushl %esi
848 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
849 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
850 ; X86-NEXT: movl %ecx, %eax
851 ; X86-NEXT: addl $-1, %eax
852 ; X86-NEXT: movl %esi, %edx
853 ; X86-NEXT: adcl $-1, %edx
854 ; X86-NEXT: xorl %ecx, %eax
855 ; X86-NEXT: xorl %esi, %edx
856 ; X86-NEXT: movl %eax, %ecx
857 ; X86-NEXT: orl %edx, %ecx
858 ; X86-NEXT: jne .LBB38_2
860 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
861 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
862 ; X86-NEXT: .LBB38_2:
863 ; X86-NEXT: popl %esi
866 ; X64-LABEL: blsmsk64_z:
868 ; X64-NEXT: blsmskq %rdi, %rax
869 ; X64-NEXT: cmoveq %rsi, %rax
872 %t1 = xor i64 %t0, %a
873 %t2 = icmp eq i64 %t1, 0
874 %t3 = select i1 %t2, i64 %b, i64 %t1
878 define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind {
879 ; X86-LABEL: blsmsk64_z2:
881 ; X86-NEXT: pushl %esi
882 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
883 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
884 ; X86-NEXT: movl %eax, %edx
885 ; X86-NEXT: addl $-1, %edx
886 ; X86-NEXT: movl %ecx, %esi
887 ; X86-NEXT: adcl $-1, %esi
888 ; X86-NEXT: xorl %eax, %edx
889 ; X86-NEXT: xorl %ecx, %esi
890 ; X86-NEXT: orl %edx, %esi
891 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
892 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
893 ; X86-NEXT: cmovel %eax, %ecx
894 ; X86-NEXT: movl (%ecx), %eax
895 ; X86-NEXT: movl 4(%ecx), %edx
896 ; X86-NEXT: popl %esi
899 ; X64-LABEL: blsmsk64_z2:
901 ; X64-NEXT: movq %rsi, %rax
902 ; X64-NEXT: blsmskq %rdi, %rcx
903 ; X64-NEXT: cmovneq %rdx, %rax
906 %t1 = xor i64 %t0, %a
907 %t2 = icmp eq i64 %t1, 0
908 %t3 = select i1 %t2, i64 %b, i64 %c
912 define i64 @blsmsk64_sle(i64 %a, i64 %b, i64 %c) nounwind {
913 ; X86-LABEL: blsmsk64_sle:
915 ; X86-NEXT: pushl %esi
916 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
917 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
918 ; X86-NEXT: movl %eax, %edx
919 ; X86-NEXT: addl $-1, %edx
920 ; X86-NEXT: movl %ecx, %esi
921 ; X86-NEXT: adcl $-1, %esi
922 ; X86-NEXT: xorl %ecx, %esi
923 ; X86-NEXT: xorl %eax, %edx
924 ; X86-NEXT: cmpl $1, %edx
925 ; X86-NEXT: sbbl $0, %esi
926 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
927 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
928 ; X86-NEXT: cmovll %eax, %ecx
929 ; X86-NEXT: movl (%ecx), %eax
930 ; X86-NEXT: movl 4(%ecx), %edx
931 ; X86-NEXT: popl %esi
934 ; X64-LABEL: blsmsk64_sle:
936 ; X64-NEXT: movq %rsi, %rax
937 ; X64-NEXT: blsmskq %rdi, %rcx
938 ; X64-NEXT: cmovgq %rdx, %rax
941 %t1 = xor i64 %t0, %a
942 %t2 = icmp sle i64 %t1, 0
943 %t3 = select i1 %t2, i64 %b, i64 %c
947 define i32 @blsr32(i32 %x) {
950 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax
955 ; X64-NEXT: blsrl %edi, %eax
958 %tmp2 = and i32 %x, %tmp
962 define i32 @blsr32_load(ptr %x) {
963 ; X86-LABEL: blsr32_load:
965 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
966 ; X86-NEXT: blsrl (%eax), %eax
969 ; X64-LABEL: blsr32_load:
971 ; X64-NEXT: blsrl (%rdi), %eax
973 %x1 = load i32, ptr %x
974 %tmp = sub i32 %x1, 1
975 %tmp2 = and i32 %x1, %tmp
979 define i32 @blsr32_z(i32 %a, i32 %b) nounwind {
980 ; X86-LABEL: blsr32_z:
982 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax
983 ; X86-NEXT: jne .LBB43_2
985 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
986 ; X86-NEXT: .LBB43_2:
989 ; X64-LABEL: blsr32_z:
991 ; X64-NEXT: blsrl %edi, %eax
992 ; X64-NEXT: cmovel %esi, %eax
995 %t1 = and i32 %t0, %a
996 %t2 = icmp eq i32 %t1, 0
997 %t3 = select i1 %t2, i32 %b, i32 %t1
1001 define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind {
1002 ; X86-LABEL: blsr32_z2:
1004 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax
1005 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
1006 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
1007 ; X86-NEXT: cmovel %eax, %ecx
1008 ; X86-NEXT: movl (%ecx), %eax
1011 ; X64-LABEL: blsr32_z2:
1013 ; X64-NEXT: movl %esi, %eax
1014 ; X64-NEXT: blsrl %edi, %ecx
1015 ; X64-NEXT: cmovnel %edx, %eax
1018 %t1 = and i32 %t0, %a
1019 %t2 = icmp eq i32 %t1, 0
1020 %t3 = select i1 %t2, i32 %b, i32 %c
1024 define i32 @blsr32_sle(i32 %a, i32 %b, i32 %c) nounwind {
1025 ; X86-LABEL: blsr32_sle:
1027 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax
1028 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
1029 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
1030 ; X86-NEXT: cmovlel %eax, %ecx
1031 ; X86-NEXT: movl (%ecx), %eax
1034 ; X64-LABEL: blsr32_sle:
1036 ; X64-NEXT: movl %esi, %eax
1037 ; X64-NEXT: blsrl %edi, %ecx
1038 ; X64-NEXT: cmovgl %edx, %eax
1041 %t1 = and i32 %t0, %a
1042 %t2 = icmp sle i32 %t1, 0
1043 %t3 = select i1 %t2, i32 %b, i32 %c
1047 define i64 @blsr64(i64 %x) {
1048 ; X86-LABEL: blsr64:
1050 ; X86-NEXT: pushl %esi
1051 ; X86-NEXT: .cfi_def_cfa_offset 8
1052 ; X86-NEXT: .cfi_offset %esi, -8
1053 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1054 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1055 ; X86-NEXT: movl %ecx, %eax
1056 ; X86-NEXT: addl $-1, %eax
1057 ; X86-NEXT: movl %esi, %edx
1058 ; X86-NEXT: adcl $-1, %edx
1059 ; X86-NEXT: andl %ecx, %eax
1060 ; X86-NEXT: andl %esi, %edx
1061 ; X86-NEXT: popl %esi
1062 ; X86-NEXT: .cfi_def_cfa_offset 4
1065 ; X64-LABEL: blsr64:
1067 ; X64-NEXT: blsrq %rdi, %rax
1069 %tmp = sub i64 %x, 1
1070 %tmp2 = and i64 %tmp, %x
1074 define i64 @blsr64_z(i64 %a, i64 %b) nounwind {
1075 ; X86-LABEL: blsr64_z:
1077 ; X86-NEXT: pushl %esi
1078 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1079 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1080 ; X86-NEXT: movl %ecx, %eax
1081 ; X86-NEXT: addl $-1, %eax
1082 ; X86-NEXT: movl %esi, %edx
1083 ; X86-NEXT: adcl $-1, %edx
1084 ; X86-NEXT: andl %ecx, %eax
1085 ; X86-NEXT: andl %esi, %edx
1086 ; X86-NEXT: movl %eax, %ecx
1087 ; X86-NEXT: orl %edx, %ecx
1088 ; X86-NEXT: jne .LBB47_2
1089 ; X86-NEXT: # %bb.1:
1090 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1091 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1092 ; X86-NEXT: .LBB47_2:
1093 ; X86-NEXT: popl %esi
1096 ; X64-LABEL: blsr64_z:
1098 ; X64-NEXT: blsrq %rdi, %rax
1099 ; X64-NEXT: cmoveq %rsi, %rax
1102 %t1 = and i64 %t0, %a
1103 %t2 = icmp eq i64 %t1, 0
1104 %t3 = select i1 %t2, i64 %b, i64 %t1
1108 define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind {
1109 ; X86-LABEL: blsr64_z2:
1111 ; X86-NEXT: pushl %esi
1112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1113 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1114 ; X86-NEXT: movl %eax, %edx
1115 ; X86-NEXT: addl $-1, %edx
1116 ; X86-NEXT: movl %ecx, %esi
1117 ; X86-NEXT: adcl $-1, %esi
1118 ; X86-NEXT: andl %eax, %edx
1119 ; X86-NEXT: andl %ecx, %esi
1120 ; X86-NEXT: orl %edx, %esi
1121 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
1122 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
1123 ; X86-NEXT: cmovel %eax, %ecx
1124 ; X86-NEXT: movl (%ecx), %eax
1125 ; X86-NEXT: movl 4(%ecx), %edx
1126 ; X86-NEXT: popl %esi
1129 ; X64-LABEL: blsr64_z2:
1131 ; X64-NEXT: movq %rsi, %rax
1132 ; X64-NEXT: blsrq %rdi, %rcx
1133 ; X64-NEXT: cmovneq %rdx, %rax
1136 %t1 = and i64 %t0, %a
1137 %t2 = icmp eq i64 %t1, 0
1138 %t3 = select i1 %t2, i64 %b, i64 %c
1142 define i64 @blsr64_sle(i64 %a, i64 %b, i64 %c) nounwind {
1143 ; X86-LABEL: blsr64_sle:
1145 ; X86-NEXT: pushl %esi
1146 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1147 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1148 ; X86-NEXT: movl %eax, %edx
1149 ; X86-NEXT: addl $-1, %edx
1150 ; X86-NEXT: movl %ecx, %esi
1151 ; X86-NEXT: adcl $-1, %esi
1152 ; X86-NEXT: andl %ecx, %esi
1153 ; X86-NEXT: andl %eax, %edx
1154 ; X86-NEXT: cmpl $1, %edx
1155 ; X86-NEXT: sbbl $0, %esi
1156 ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
1157 ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
1158 ; X86-NEXT: cmovll %eax, %ecx
1159 ; X86-NEXT: movl (%ecx), %eax
1160 ; X86-NEXT: movl 4(%ecx), %edx
1161 ; X86-NEXT: popl %esi
1164 ; X64-LABEL: blsr64_sle:
1166 ; X64-NEXT: movq %rsi, %rax
1167 ; X64-NEXT: blsrq %rdi, %rcx
1168 ; X64-NEXT: cmovgq %rdx, %rax
1171 %t1 = and i64 %t0, %a
1172 %t2 = icmp sle i64 %t1, 0
1173 %t3 = select i1 %t2, i64 %b, i64 %c
1177 ; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792
1179 define i64 @blsr_disguised_constant(i64 %x) {
1180 ; X86-LABEL: blsr_disguised_constant:
1182 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax
1183 ; X86-NEXT: movzwl %ax, %eax
1184 ; X86-NEXT: xorl %edx, %edx
1187 ; X64-LABEL: blsr_disguised_constant:
1189 ; X64-NEXT: blsrl %edi, %eax
1190 ; X64-NEXT: movzwl %ax, %eax
1192 %a1 = and i64 %x, 65535
1193 %a2 = add i64 %x, 65535
1194 %r = and i64 %a1, %a2
1198 ; The add here used to get shrunk, but the and did not thus hiding the blsr pattern.
1199 ; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too.
1200 define i64 @blsr_disguised_shrunk_add(i64 %x) {
1201 ; X86-LABEL: blsr_disguised_shrunk_add:
1203 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1204 ; X86-NEXT: shrl $16, %eax
1205 ; X86-NEXT: blsrl %eax, %eax
1206 ; X86-NEXT: xorl %edx, %edx
1209 ; X64-LABEL: blsr_disguised_shrunk_add:
1211 ; X64-NEXT: shrq $48, %rdi
1212 ; X64-NEXT: blsrl %edi, %eax
1214 %a = lshr i64 %x, 48
1220 define void @pr40060(i32, i32) {
1221 ; X86-LABEL: pr40060:
1223 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1224 ; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
1225 ; X86-NEXT: testl %eax, %eax
1226 ; X86-NEXT: jns bar # TAILCALL
1227 ; X86-NEXT: # %bb.1:
1230 ; X64-LABEL: pr40060:
1232 ; X64-NEXT: bextrl %esi, %edi, %eax
1233 ; X64-NEXT: testl %eax, %eax
1234 ; X64-NEXT: jns bar # TAILCALL
1235 ; X64-NEXT: # %bb.1:
1237 %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1)
1238 %4 = icmp sgt i32 %3, -1
1239 br i1 %4, label %5, label %6
1241 tail call void @bar()
1247 define i32 @blsr32_branch(i32 %x) {
1248 ; X86-LABEL: blsr32_branch:
1250 ; X86-NEXT: pushl %esi
1251 ; X86-NEXT: .cfi_def_cfa_offset 8
1252 ; X86-NEXT: .cfi_offset %esi, -8
1253 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %esi
1254 ; X86-NEXT: jne .LBB53_2
1255 ; X86-NEXT: # %bb.1:
1256 ; X86-NEXT: calll bar
1257 ; X86-NEXT: .LBB53_2:
1258 ; X86-NEXT: movl %esi, %eax
1259 ; X86-NEXT: popl %esi
1260 ; X86-NEXT: .cfi_def_cfa_offset 4
1263 ; X64-LABEL: blsr32_branch:
1265 ; X64-NEXT: pushq %rbx
1266 ; X64-NEXT: .cfi_def_cfa_offset 16
1267 ; X64-NEXT: .cfi_offset %rbx, -16
1268 ; X64-NEXT: blsrl %edi, %ebx
1269 ; X64-NEXT: jne .LBB53_2
1270 ; X64-NEXT: # %bb.1:
1271 ; X64-NEXT: callq bar
1272 ; X64-NEXT: .LBB53_2:
1273 ; X64-NEXT: movl %ebx, %eax
1274 ; X64-NEXT: popq %rbx
1275 ; X64-NEXT: .cfi_def_cfa_offset 8
1277 %tmp = sub i32 %x, 1
1278 %tmp2 = and i32 %x, %tmp
1279 %cmp = icmp eq i32 %tmp2, 0
1280 br i1 %cmp, label %1, label %2
1282 tail call void @bar()
1287 define i64 @blsr64_branch(i64 %x) {
1288 ; X86-LABEL: blsr64_branch:
1290 ; X86-NEXT: pushl %edi
1291 ; X86-NEXT: .cfi_def_cfa_offset 8
1292 ; X86-NEXT: pushl %esi
1293 ; X86-NEXT: .cfi_def_cfa_offset 12
1294 ; X86-NEXT: .cfi_offset %esi, -12
1295 ; X86-NEXT: .cfi_offset %edi, -8
1296 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1297 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1298 ; X86-NEXT: movl %eax, %esi
1299 ; X86-NEXT: addl $-1, %esi
1300 ; X86-NEXT: movl %ecx, %edi
1301 ; X86-NEXT: adcl $-1, %edi
1302 ; X86-NEXT: andl %eax, %esi
1303 ; X86-NEXT: andl %ecx, %edi
1304 ; X86-NEXT: movl %esi, %eax
1305 ; X86-NEXT: orl %edi, %eax
1306 ; X86-NEXT: jne .LBB54_2
1307 ; X86-NEXT: # %bb.1:
1308 ; X86-NEXT: calll bar
1309 ; X86-NEXT: .LBB54_2:
1310 ; X86-NEXT: movl %esi, %eax
1311 ; X86-NEXT: movl %edi, %edx
1312 ; X86-NEXT: popl %esi
1313 ; X86-NEXT: .cfi_def_cfa_offset 8
1314 ; X86-NEXT: popl %edi
1315 ; X86-NEXT: .cfi_def_cfa_offset 4
1318 ; X64-LABEL: blsr64_branch:
1320 ; X64-NEXT: pushq %rbx
1321 ; X64-NEXT: .cfi_def_cfa_offset 16
1322 ; X64-NEXT: .cfi_offset %rbx, -16
1323 ; X64-NEXT: blsrq %rdi, %rbx
1324 ; X64-NEXT: jne .LBB54_2
1325 ; X64-NEXT: # %bb.1:
1326 ; X64-NEXT: callq bar
1327 ; X64-NEXT: .LBB54_2:
1328 ; X64-NEXT: movq %rbx, %rax
1329 ; X64-NEXT: popq %rbx
1330 ; X64-NEXT: .cfi_def_cfa_offset 8
1332 %tmp = sub i64 %x, 1
1333 %tmp2 = and i64 %x, %tmp
1334 %cmp = icmp eq i64 %tmp2, 0
1335 br i1 %cmp, label %1, label %2
1337 tail call void @bar()
1342 define i32 @blsi32_branch(i32 %x) {
1343 ; X86-LABEL: blsi32_branch:
1345 ; X86-NEXT: pushl %esi
1346 ; X86-NEXT: .cfi_def_cfa_offset 8
1347 ; X86-NEXT: .cfi_offset %esi, -8
1348 ; X86-NEXT: blsil {{[0-9]+}}(%esp), %esi
1349 ; X86-NEXT: jne .LBB55_2
1350 ; X86-NEXT: # %bb.1:
1351 ; X86-NEXT: calll bar
1352 ; X86-NEXT: .LBB55_2:
1353 ; X86-NEXT: movl %esi, %eax
1354 ; X86-NEXT: popl %esi
1355 ; X86-NEXT: .cfi_def_cfa_offset 4
1358 ; X64-LABEL: blsi32_branch:
1360 ; X64-NEXT: pushq %rbx
1361 ; X64-NEXT: .cfi_def_cfa_offset 16
1362 ; X64-NEXT: .cfi_offset %rbx, -16
1363 ; X64-NEXT: blsil %edi, %ebx
1364 ; X64-NEXT: jne .LBB55_2
1365 ; X64-NEXT: # %bb.1:
1366 ; X64-NEXT: callq bar
1367 ; X64-NEXT: .LBB55_2:
1368 ; X64-NEXT: movl %ebx, %eax
1369 ; X64-NEXT: popq %rbx
1370 ; X64-NEXT: .cfi_def_cfa_offset 8
1372 %tmp = sub i32 0, %x
1373 %tmp2 = and i32 %x, %tmp
1374 %cmp = icmp eq i32 %tmp2, 0
1375 br i1 %cmp, label %1, label %2
1377 tail call void @bar()
1382 define i64 @blsi64_branch(i64 %x) {
1383 ; X86-LABEL: blsi64_branch:
1385 ; X86-NEXT: pushl %edi
1386 ; X86-NEXT: .cfi_def_cfa_offset 8
1387 ; X86-NEXT: pushl %esi
1388 ; X86-NEXT: .cfi_def_cfa_offset 12
1389 ; X86-NEXT: .cfi_offset %esi, -12
1390 ; X86-NEXT: .cfi_offset %edi, -8
1391 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1392 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1393 ; X86-NEXT: xorl %esi, %esi
1394 ; X86-NEXT: movl %eax, %edi
1395 ; X86-NEXT: negl %edi
1396 ; X86-NEXT: sbbl %ecx, %esi
1397 ; X86-NEXT: andl %ecx, %esi
1398 ; X86-NEXT: andl %eax, %edi
1399 ; X86-NEXT: movl %edi, %eax
1400 ; X86-NEXT: orl %esi, %eax
1401 ; X86-NEXT: jne .LBB56_2
1402 ; X86-NEXT: # %bb.1:
1403 ; X86-NEXT: calll bar
1404 ; X86-NEXT: .LBB56_2:
1405 ; X86-NEXT: movl %edi, %eax
1406 ; X86-NEXT: movl %esi, %edx
1407 ; X86-NEXT: popl %esi
1408 ; X86-NEXT: .cfi_def_cfa_offset 8
1409 ; X86-NEXT: popl %edi
1410 ; X86-NEXT: .cfi_def_cfa_offset 4
1413 ; X64-LABEL: blsi64_branch:
1415 ; X64-NEXT: pushq %rbx
1416 ; X64-NEXT: .cfi_def_cfa_offset 16
1417 ; X64-NEXT: .cfi_offset %rbx, -16
1418 ; X64-NEXT: blsiq %rdi, %rbx
1419 ; X64-NEXT: jne .LBB56_2
1420 ; X64-NEXT: # %bb.1:
1421 ; X64-NEXT: callq bar
1422 ; X64-NEXT: .LBB56_2:
1423 ; X64-NEXT: movq %rbx, %rax
1424 ; X64-NEXT: popq %rbx
1425 ; X64-NEXT: .cfi_def_cfa_offset 8
1427 %tmp = sub i64 0, %x
1428 %tmp2 = and i64 %x, %tmp
1429 %cmp = icmp eq i64 %tmp2, 0
1430 br i1 %cmp, label %1, label %2
1432 tail call void @bar()
1437 declare dso_local void @bar()
1439 define void @pr42118_i32(i32 %x) {
1440 ; X86-LABEL: pr42118_i32:
1442 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax
1443 ; X86-NEXT: je bar # TAILCALL
1444 ; X86-NEXT: # %bb.1:
1447 ; X64-LABEL: pr42118_i32:
1449 ; X64-NEXT: blsrl %edi, %eax
1450 ; X64-NEXT: je bar # TAILCALL
1451 ; X64-NEXT: # %bb.1:
1453 %tmp = sub i32 0, %x
1454 %tmp1 = and i32 %tmp, %x
1455 %cmp = icmp eq i32 %tmp1, %x
1456 br i1 %cmp, label %1, label %2
1458 tail call void @bar()
1464 define void @pr42118_i64(i64 %x) {
1465 ; X86-LABEL: pr42118_i64:
1467 ; X86-NEXT: pushl %esi
1468 ; X86-NEXT: .cfi_def_cfa_offset 8
1469 ; X86-NEXT: .cfi_offset %esi, -8
1470 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1471 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1472 ; X86-NEXT: movl %eax, %edx
1473 ; X86-NEXT: addl $-1, %edx
1474 ; X86-NEXT: movl %ecx, %esi
1475 ; X86-NEXT: adcl $-1, %esi
1476 ; X86-NEXT: andl %eax, %edx
1477 ; X86-NEXT: andl %ecx, %esi
1478 ; X86-NEXT: orl %edx, %esi
1479 ; X86-NEXT: jne .LBB58_1
1480 ; X86-NEXT: # %bb.2:
1481 ; X86-NEXT: popl %esi
1482 ; X86-NEXT: .cfi_def_cfa_offset 4
1483 ; X86-NEXT: jmp bar # TAILCALL
1484 ; X86-NEXT: .LBB58_1:
1485 ; X86-NEXT: .cfi_def_cfa_offset 8
1486 ; X86-NEXT: popl %esi
1487 ; X86-NEXT: .cfi_def_cfa_offset 4
1490 ; X64-LABEL: pr42118_i64:
1492 ; X64-NEXT: blsrq %rdi, %rax
1493 ; X64-NEXT: je bar # TAILCALL
1494 ; X64-NEXT: # %bb.1:
1496 %tmp = sub i64 0, %x
1497 %tmp1 = and i64 %tmp, %x
1498 %cmp = icmp eq i64 %tmp1, %x
1499 br i1 %cmp, label %1, label %2
1501 tail call void @bar()
1507 define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
1508 ; X86-LABEL: blsi_cflag_32:
1510 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1511 ; X86-NEXT: testl %eax, %eax
1512 ; X86-NEXT: jne .LBB59_1
1513 ; X86-NEXT: # %bb.2:
1514 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1516 ; X86-NEXT: .LBB59_1:
1517 ; X86-NEXT: blsil %eax, %eax
1520 ; X64-LABEL: blsi_cflag_32:
1522 ; X64-NEXT: blsil %edi, %eax
1523 ; X64-NEXT: cmovael %esi, %eax
1525 %tobool = icmp eq i32 %x, 0
1526 %sub = sub nsw i32 0, %x
1527 %and = and i32 %sub, %x
1528 %cond = select i1 %tobool, i32 %y, i32 %and
1532 define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind {
1533 ; X86-LABEL: blsi_cflag_64:
1535 ; X86-NEXT: pushl %edi
1536 ; X86-NEXT: pushl %esi
1537 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1538 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1539 ; X86-NEXT: xorl %edx, %edx
1540 ; X86-NEXT: movl %ecx, %eax
1541 ; X86-NEXT: negl %eax
1542 ; X86-NEXT: sbbl %esi, %edx
1543 ; X86-NEXT: movl %ecx, %edi
1544 ; X86-NEXT: orl %esi, %edi
1545 ; X86-NEXT: jne .LBB60_1
1546 ; X86-NEXT: # %bb.2:
1547 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1548 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1549 ; X86-NEXT: jmp .LBB60_3
1550 ; X86-NEXT: .LBB60_1:
1551 ; X86-NEXT: andl %esi, %edx
1552 ; X86-NEXT: andl %ecx, %eax
1553 ; X86-NEXT: .LBB60_3:
1554 ; X86-NEXT: popl %esi
1555 ; X86-NEXT: popl %edi
1558 ; X64-LABEL: blsi_cflag_64:
1560 ; X64-NEXT: blsiq %rdi, %rax
1561 ; X64-NEXT: cmovaeq %rsi, %rax
1563 %tobool = icmp eq i64 %x, 0
1564 %sub = sub nsw i64 0, %x
1565 %and = and i64 %sub, %x
1566 %cond = select i1 %tobool, i64 %y, i64 %and