1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X64
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X32-CLZ
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X64-CLZ
7 declare i8 @llvm.cttz.i8(i8, i1)
8 declare i16 @llvm.cttz.i16(i16, i1)
9 declare i32 @llvm.cttz.i32(i32, i1)
10 declare i64 @llvm.cttz.i64(i64, i1)
12 declare i8 @llvm.ctlz.i8(i8, i1)
13 declare i16 @llvm.ctlz.i16(i16, i1)
14 declare i32 @llvm.ctlz.i32(i32, i1)
15 declare i64 @llvm.ctlz.i64(i64, i1)
17 define i8 @cttz_i8(i8 %x) {
20 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
21 ; X32-NEXT: bsfl %eax, %eax
22 ; X32-NEXT: # kill: def $al killed $al killed $eax
27 ; X64-NEXT: movzbl %dil, %eax
28 ; X64-NEXT: bsfl %eax, %eax
29 ; X64-NEXT: # kill: def $al killed $al killed $eax
32 ; X32-CLZ-LABEL: cttz_i8:
34 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
35 ; X32-CLZ-NEXT: tzcntl %eax, %eax
36 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
39 ; X64-CLZ-LABEL: cttz_i8:
41 ; X64-CLZ-NEXT: movzbl %dil, %eax
42 ; X64-CLZ-NEXT: tzcntl %eax, %eax
43 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
45 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
49 define i16 @cttz_i16(i16 %x) {
50 ; X32-LABEL: cttz_i16:
52 ; X32-NEXT: bsfw {{[0-9]+}}(%esp), %ax
55 ; X64-LABEL: cttz_i16:
57 ; X64-NEXT: bsfw %di, %ax
60 ; X32-CLZ-LABEL: cttz_i16:
62 ; X32-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
65 ; X64-CLZ-LABEL: cttz_i16:
67 ; X64-CLZ-NEXT: tzcntw %di, %ax
69 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
73 define i32 @cttz_i32(i32 %x) {
74 ; X32-LABEL: cttz_i32:
76 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax
79 ; X64-LABEL: cttz_i32:
81 ; X64-NEXT: bsfl %edi, %eax
84 ; X32-CLZ-LABEL: cttz_i32:
86 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
89 ; X64-CLZ-LABEL: cttz_i32:
91 ; X64-CLZ-NEXT: tzcntl %edi, %eax
93 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
97 define i64 @cttz_i64(i64 %x) {
98 ; X32-LABEL: cttz_i64:
100 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
101 ; X32-NEXT: testl %eax, %eax
102 ; X32-NEXT: jne .LBB3_1
104 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax
105 ; X32-NEXT: addl $32, %eax
106 ; X32-NEXT: xorl %edx, %edx
109 ; X32-NEXT: bsfl %eax, %eax
110 ; X32-NEXT: xorl %edx, %edx
113 ; X64-LABEL: cttz_i64:
115 ; X64-NEXT: bsfq %rdi, %rax
118 ; X32-CLZ-LABEL: cttz_i64:
120 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
121 ; X32-CLZ-NEXT: testl %eax, %eax
122 ; X32-CLZ-NEXT: jne .LBB3_1
123 ; X32-CLZ-NEXT: # %bb.2:
124 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
125 ; X32-CLZ-NEXT: addl $32, %eax
126 ; X32-CLZ-NEXT: xorl %edx, %edx
128 ; X32-CLZ-NEXT: .LBB3_1:
129 ; X32-CLZ-NEXT: tzcntl %eax, %eax
130 ; X32-CLZ-NEXT: xorl %edx, %edx
133 ; X64-CLZ-LABEL: cttz_i64:
135 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
137 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
141 define i8 @ctlz_i8(i8 %x) {
142 ; X32-LABEL: ctlz_i8:
144 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
145 ; X32-NEXT: bsrl %eax, %eax
146 ; X32-NEXT: xorl $7, %eax
147 ; X32-NEXT: # kill: def $al killed $al killed $eax
150 ; X64-LABEL: ctlz_i8:
152 ; X64-NEXT: movzbl %dil, %eax
153 ; X64-NEXT: bsrl %eax, %eax
154 ; X64-NEXT: xorl $7, %eax
155 ; X64-NEXT: # kill: def $al killed $al killed $eax
158 ; X32-CLZ-LABEL: ctlz_i8:
160 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
161 ; X32-CLZ-NEXT: lzcntl %eax, %eax
162 ; X32-CLZ-NEXT: addl $-24, %eax
163 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
166 ; X64-CLZ-LABEL: ctlz_i8:
168 ; X64-CLZ-NEXT: movzbl %dil, %eax
169 ; X64-CLZ-NEXT: lzcntl %eax, %eax
170 ; X64-CLZ-NEXT: addl $-24, %eax
171 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
173 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
177 define i16 @ctlz_i16(i16 %x) {
178 ; X32-LABEL: ctlz_i16:
180 ; X32-NEXT: bsrw {{[0-9]+}}(%esp), %ax
181 ; X32-NEXT: xorl $15, %eax
182 ; X32-NEXT: # kill: def $ax killed $ax killed $eax
185 ; X64-LABEL: ctlz_i16:
187 ; X64-NEXT: bsrw %di, %ax
188 ; X64-NEXT: xorl $15, %eax
189 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
192 ; X32-CLZ-LABEL: ctlz_i16:
194 ; X32-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
197 ; X64-CLZ-LABEL: ctlz_i16:
199 ; X64-CLZ-NEXT: lzcntw %di, %ax
201 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
205 define i32 @ctlz_i32(i32 %x) {
206 ; X32-LABEL: ctlz_i32:
208 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax
209 ; X32-NEXT: xorl $31, %eax
212 ; X64-LABEL: ctlz_i32:
214 ; X64-NEXT: bsrl %edi, %eax
215 ; X64-NEXT: xorl $31, %eax
218 ; X32-CLZ-LABEL: ctlz_i32:
220 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
223 ; X64-CLZ-LABEL: ctlz_i32:
225 ; X64-CLZ-NEXT: lzcntl %edi, %eax
227 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
231 define i64 @ctlz_i64(i64 %x) {
232 ; X32-LABEL: ctlz_i64:
234 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
235 ; X32-NEXT: testl %eax, %eax
236 ; X32-NEXT: jne .LBB7_1
238 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax
239 ; X32-NEXT: xorl $31, %eax
240 ; X32-NEXT: addl $32, %eax
241 ; X32-NEXT: xorl %edx, %edx
244 ; X32-NEXT: bsrl %eax, %eax
245 ; X32-NEXT: xorl $31, %eax
246 ; X32-NEXT: xorl %edx, %edx
249 ; X64-LABEL: ctlz_i64:
251 ; X64-NEXT: bsrq %rdi, %rax
252 ; X64-NEXT: xorq $63, %rax
255 ; X32-CLZ-LABEL: ctlz_i64:
257 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
258 ; X32-CLZ-NEXT: testl %eax, %eax
259 ; X32-CLZ-NEXT: jne .LBB7_1
260 ; X32-CLZ-NEXT: # %bb.2:
261 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
262 ; X32-CLZ-NEXT: addl $32, %eax
263 ; X32-CLZ-NEXT: xorl %edx, %edx
265 ; X32-CLZ-NEXT: .LBB7_1:
266 ; X32-CLZ-NEXT: lzcntl %eax, %eax
267 ; X32-CLZ-NEXT: xorl %edx, %edx
270 ; X64-CLZ-LABEL: ctlz_i64:
272 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
274 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
278 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
279 define i8 @ctlz_i8_zero_test(i8 %n) {
280 ; X32-LABEL: ctlz_i8_zero_test:
282 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
283 ; X32-NEXT: testb %al, %al
284 ; X32-NEXT: je .LBB8_1
285 ; X32-NEXT: # %bb.2: # %cond.false
286 ; X32-NEXT: movzbl %al, %eax
287 ; X32-NEXT: bsrl %eax, %eax
288 ; X32-NEXT: xorl $7, %eax
289 ; X32-NEXT: # kill: def $al killed $al killed $eax
292 ; X32-NEXT: movb $8, %al
293 ; X32-NEXT: # kill: def $al killed $al killed $eax
296 ; X64-LABEL: ctlz_i8_zero_test:
298 ; X64-NEXT: testb %dil, %dil
299 ; X64-NEXT: je .LBB8_1
300 ; X64-NEXT: # %bb.2: # %cond.false
301 ; X64-NEXT: movzbl %dil, %eax
302 ; X64-NEXT: bsrl %eax, %eax
303 ; X64-NEXT: xorl $7, %eax
304 ; X64-NEXT: # kill: def $al killed $al killed $eax
307 ; X64-NEXT: movb $8, %al
308 ; X64-NEXT: # kill: def $al killed $al killed $eax
311 ; X32-CLZ-LABEL: ctlz_i8_zero_test:
313 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
314 ; X32-CLZ-NEXT: lzcntl %eax, %eax
315 ; X32-CLZ-NEXT: addl $-24, %eax
316 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
319 ; X64-CLZ-LABEL: ctlz_i8_zero_test:
321 ; X64-CLZ-NEXT: movzbl %dil, %eax
322 ; X64-CLZ-NEXT: lzcntl %eax, %eax
323 ; X64-CLZ-NEXT: addl $-24, %eax
324 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
326 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
330 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
331 define i16 @ctlz_i16_zero_test(i16 %n) {
332 ; X32-LABEL: ctlz_i16_zero_test:
334 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
335 ; X32-NEXT: testw %ax, %ax
336 ; X32-NEXT: je .LBB9_1
337 ; X32-NEXT: # %bb.2: # %cond.false
338 ; X32-NEXT: bsrw %ax, %ax
339 ; X32-NEXT: xorl $15, %eax
340 ; X32-NEXT: # kill: def $ax killed $ax killed $eax
343 ; X32-NEXT: movw $16, %ax
344 ; X32-NEXT: # kill: def $ax killed $ax killed $eax
347 ; X64-LABEL: ctlz_i16_zero_test:
349 ; X64-NEXT: testw %di, %di
350 ; X64-NEXT: je .LBB9_1
351 ; X64-NEXT: # %bb.2: # %cond.false
352 ; X64-NEXT: bsrw %di, %ax
353 ; X64-NEXT: xorl $15, %eax
354 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
357 ; X64-NEXT: movw $16, %ax
358 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
361 ; X32-CLZ-LABEL: ctlz_i16_zero_test:
363 ; X32-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
366 ; X64-CLZ-LABEL: ctlz_i16_zero_test:
368 ; X64-CLZ-NEXT: lzcntw %di, %ax
370 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
374 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
375 define i32 @ctlz_i32_zero_test(i32 %n) {
376 ; X32-LABEL: ctlz_i32_zero_test:
378 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
379 ; X32-NEXT: testl %eax, %eax
380 ; X32-NEXT: je .LBB10_1
381 ; X32-NEXT: # %bb.2: # %cond.false
382 ; X32-NEXT: bsrl %eax, %eax
383 ; X32-NEXT: xorl $31, %eax
385 ; X32-NEXT: .LBB10_1:
386 ; X32-NEXT: movl $32, %eax
389 ; X64-LABEL: ctlz_i32_zero_test:
391 ; X64-NEXT: testl %edi, %edi
392 ; X64-NEXT: je .LBB10_1
393 ; X64-NEXT: # %bb.2: # %cond.false
394 ; X64-NEXT: bsrl %edi, %eax
395 ; X64-NEXT: xorl $31, %eax
397 ; X64-NEXT: .LBB10_1:
398 ; X64-NEXT: movl $32, %eax
401 ; X32-CLZ-LABEL: ctlz_i32_zero_test:
403 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
406 ; X64-CLZ-LABEL: ctlz_i32_zero_test:
408 ; X64-CLZ-NEXT: lzcntl %edi, %eax
410 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
414 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
415 define i64 @ctlz_i64_zero_test(i64 %n) {
416 ; X32-LABEL: ctlz_i64_zero_test:
418 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
419 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %edx
420 ; X32-NEXT: movl $63, %eax
421 ; X32-NEXT: je .LBB11_2
423 ; X32-NEXT: movl %edx, %eax
424 ; X32-NEXT: .LBB11_2:
425 ; X32-NEXT: testl %ecx, %ecx
426 ; X32-NEXT: jne .LBB11_3
428 ; X32-NEXT: xorl $31, %eax
429 ; X32-NEXT: addl $32, %eax
430 ; X32-NEXT: xorl %edx, %edx
432 ; X32-NEXT: .LBB11_3:
433 ; X32-NEXT: bsrl %ecx, %eax
434 ; X32-NEXT: xorl $31, %eax
435 ; X32-NEXT: xorl %edx, %edx
438 ; X64-LABEL: ctlz_i64_zero_test:
440 ; X64-NEXT: testq %rdi, %rdi
441 ; X64-NEXT: je .LBB11_1
442 ; X64-NEXT: # %bb.2: # %cond.false
443 ; X64-NEXT: bsrq %rdi, %rax
444 ; X64-NEXT: xorq $63, %rax
446 ; X64-NEXT: .LBB11_1:
447 ; X64-NEXT: movl $64, %eax
450 ; X32-CLZ-LABEL: ctlz_i64_zero_test:
452 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
453 ; X32-CLZ-NEXT: testl %eax, %eax
454 ; X32-CLZ-NEXT: jne .LBB11_1
455 ; X32-CLZ-NEXT: # %bb.2:
456 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
457 ; X32-CLZ-NEXT: addl $32, %eax
458 ; X32-CLZ-NEXT: xorl %edx, %edx
460 ; X32-CLZ-NEXT: .LBB11_1:
461 ; X32-CLZ-NEXT: lzcntl %eax, %eax
462 ; X32-CLZ-NEXT: xorl %edx, %edx
465 ; X64-CLZ-LABEL: ctlz_i64_zero_test:
467 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
469 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
473 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
474 define i8 @cttz_i8_zero_test(i8 %n) {
475 ; X32-LABEL: cttz_i8_zero_test:
477 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
478 ; X32-NEXT: testb %al, %al
479 ; X32-NEXT: je .LBB12_1
480 ; X32-NEXT: # %bb.2: # %cond.false
481 ; X32-NEXT: movzbl %al, %eax
482 ; X32-NEXT: bsfl %eax, %eax
483 ; X32-NEXT: # kill: def $al killed $al killed $eax
485 ; X32-NEXT: .LBB12_1:
486 ; X32-NEXT: movb $8, %al
487 ; X32-NEXT: # kill: def $al killed $al killed $eax
490 ; X64-LABEL: cttz_i8_zero_test:
492 ; X64-NEXT: testb %dil, %dil
493 ; X64-NEXT: je .LBB12_1
494 ; X64-NEXT: # %bb.2: # %cond.false
495 ; X64-NEXT: movzbl %dil, %eax
496 ; X64-NEXT: bsfl %eax, %eax
497 ; X64-NEXT: # kill: def $al killed $al killed $eax
499 ; X64-NEXT: .LBB12_1:
500 ; X64-NEXT: movb $8, %al
501 ; X64-NEXT: # kill: def $al killed $al killed $eax
504 ; X32-CLZ-LABEL: cttz_i8_zero_test:
506 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
507 ; X32-CLZ-NEXT: orl $256, %eax # imm = 0x100
508 ; X32-CLZ-NEXT: tzcntl %eax, %eax
509 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
512 ; X64-CLZ-LABEL: cttz_i8_zero_test:
514 ; X64-CLZ-NEXT: movzbl %dil, %eax
515 ; X64-CLZ-NEXT: orl $256, %eax # imm = 0x100
516 ; X64-CLZ-NEXT: tzcntl %eax, %eax
517 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
519 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
523 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
524 define i16 @cttz_i16_zero_test(i16 %n) {
525 ; X32-LABEL: cttz_i16_zero_test:
527 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
528 ; X32-NEXT: testw %ax, %ax
529 ; X32-NEXT: je .LBB13_1
530 ; X32-NEXT: # %bb.2: # %cond.false
531 ; X32-NEXT: bsfw %ax, %ax
533 ; X32-NEXT: .LBB13_1:
534 ; X32-NEXT: movw $16, %ax
537 ; X64-LABEL: cttz_i16_zero_test:
539 ; X64-NEXT: testw %di, %di
540 ; X64-NEXT: je .LBB13_1
541 ; X64-NEXT: # %bb.2: # %cond.false
542 ; X64-NEXT: bsfw %di, %ax
544 ; X64-NEXT: .LBB13_1:
545 ; X64-NEXT: movw $16, %ax
548 ; X32-CLZ-LABEL: cttz_i16_zero_test:
550 ; X32-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
553 ; X64-CLZ-LABEL: cttz_i16_zero_test:
555 ; X64-CLZ-NEXT: tzcntw %di, %ax
557 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
561 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
562 define i32 @cttz_i32_zero_test(i32 %n) {
563 ; X32-LABEL: cttz_i32_zero_test:
565 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
566 ; X32-NEXT: testl %eax, %eax
567 ; X32-NEXT: je .LBB14_1
568 ; X32-NEXT: # %bb.2: # %cond.false
569 ; X32-NEXT: bsfl %eax, %eax
571 ; X32-NEXT: .LBB14_1:
572 ; X32-NEXT: movl $32, %eax
575 ; X64-LABEL: cttz_i32_zero_test:
577 ; X64-NEXT: testl %edi, %edi
578 ; X64-NEXT: je .LBB14_1
579 ; X64-NEXT: # %bb.2: # %cond.false
580 ; X64-NEXT: bsfl %edi, %eax
582 ; X64-NEXT: .LBB14_1:
583 ; X64-NEXT: movl $32, %eax
586 ; X32-CLZ-LABEL: cttz_i32_zero_test:
588 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
591 ; X64-CLZ-LABEL: cttz_i32_zero_test:
593 ; X64-CLZ-NEXT: tzcntl %edi, %eax
595 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
599 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
600 define i64 @cttz_i64_zero_test(i64 %n) {
601 ; X32-LABEL: cttz_i64_zero_test:
603 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
604 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %edx
605 ; X32-NEXT: movl $32, %eax
606 ; X32-NEXT: je .LBB15_2
608 ; X32-NEXT: movl %edx, %eax
609 ; X32-NEXT: .LBB15_2:
610 ; X32-NEXT: testl %ecx, %ecx
611 ; X32-NEXT: jne .LBB15_3
613 ; X32-NEXT: addl $32, %eax
614 ; X32-NEXT: xorl %edx, %edx
616 ; X32-NEXT: .LBB15_3:
617 ; X32-NEXT: bsfl %ecx, %eax
618 ; X32-NEXT: xorl %edx, %edx
621 ; X64-LABEL: cttz_i64_zero_test:
623 ; X64-NEXT: testq %rdi, %rdi
624 ; X64-NEXT: je .LBB15_1
625 ; X64-NEXT: # %bb.2: # %cond.false
626 ; X64-NEXT: bsfq %rdi, %rax
628 ; X64-NEXT: .LBB15_1:
629 ; X64-NEXT: movl $64, %eax
632 ; X32-CLZ-LABEL: cttz_i64_zero_test:
634 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
635 ; X32-CLZ-NEXT: testl %eax, %eax
636 ; X32-CLZ-NEXT: jne .LBB15_1
637 ; X32-CLZ-NEXT: # %bb.2:
638 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
639 ; X32-CLZ-NEXT: addl $32, %eax
640 ; X32-CLZ-NEXT: xorl %edx, %edx
642 ; X32-CLZ-NEXT: .LBB15_1:
643 ; X32-CLZ-NEXT: tzcntl %eax, %eax
644 ; X32-CLZ-NEXT: xorl %edx, %edx
647 ; X64-CLZ-LABEL: cttz_i64_zero_test:
649 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
651 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
655 ; Don't generate the cmovne when the source is known non-zero (and bsr would
658 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
659 ; codegen doesn't know how to delete the movl and je.
660 define i32 @ctlz_i32_fold_cmov(i32 %n) {
661 ; X32-LABEL: ctlz_i32_fold_cmov:
663 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
664 ; X32-NEXT: orl $1, %eax
665 ; X32-NEXT: je .LBB16_1
666 ; X32-NEXT: # %bb.2: # %cond.false
667 ; X32-NEXT: bsrl %eax, %eax
668 ; X32-NEXT: xorl $31, %eax
670 ; X32-NEXT: .LBB16_1:
671 ; X32-NEXT: movl $32, %eax
674 ; X64-LABEL: ctlz_i32_fold_cmov:
676 ; X64-NEXT: orl $1, %edi
677 ; X64-NEXT: je .LBB16_1
678 ; X64-NEXT: # %bb.2: # %cond.false
679 ; X64-NEXT: bsrl %edi, %eax
680 ; X64-NEXT: xorl $31, %eax
682 ; X64-NEXT: .LBB16_1:
683 ; X64-NEXT: movl $32, %eax
686 ; X32-CLZ-LABEL: ctlz_i32_fold_cmov:
688 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
689 ; X32-CLZ-NEXT: orl $1, %eax
690 ; X32-CLZ-NEXT: lzcntl %eax, %eax
693 ; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
695 ; X64-CLZ-NEXT: orl $1, %edi
696 ; X64-CLZ-NEXT: lzcntl %edi, %eax
699 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
703 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
704 ; the most significant bit, which is what 'bsr' does natively.
705 ; FIXME: We should probably select BSR instead of LZCNT in these circumstances.
706 define i32 @ctlz_bsr(i32 %n) {
707 ; X32-LABEL: ctlz_bsr:
709 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax
712 ; X64-LABEL: ctlz_bsr:
714 ; X64-NEXT: bsrl %edi, %eax
717 ; X32-CLZ-LABEL: ctlz_bsr:
719 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
720 ; X32-CLZ-NEXT: xorl $31, %eax
723 ; X64-CLZ-LABEL: ctlz_bsr:
725 ; X64-CLZ-NEXT: lzcntl %edi, %eax
726 ; X64-CLZ-NEXT: xorl $31, %eax
728 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
729 %bsr = xor i32 %ctlz, 31
733 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
734 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
735 ; codegen doesn't know how to combine the $32 and $31 into $63.
736 define i32 @ctlz_bsr_zero_test(i32 %n) {
737 ; X32-LABEL: ctlz_bsr_zero_test:
739 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
740 ; X32-NEXT: testl %eax, %eax
741 ; X32-NEXT: je .LBB18_1
742 ; X32-NEXT: # %bb.2: # %cond.false
743 ; X32-NEXT: bsrl %eax, %eax
744 ; X32-NEXT: xorl $31, %eax
745 ; X32-NEXT: xorl $31, %eax
747 ; X32-NEXT: .LBB18_1:
748 ; X32-NEXT: movl $32, %eax
749 ; X32-NEXT: xorl $31, %eax
752 ; X64-LABEL: ctlz_bsr_zero_test:
754 ; X64-NEXT: testl %edi, %edi
755 ; X64-NEXT: je .LBB18_1
756 ; X64-NEXT: # %bb.2: # %cond.false
757 ; X64-NEXT: bsrl %edi, %eax
758 ; X64-NEXT: xorl $31, %eax
759 ; X64-NEXT: xorl $31, %eax
761 ; X64-NEXT: .LBB18_1:
762 ; X64-NEXT: movl $32, %eax
763 ; X64-NEXT: xorl $31, %eax
766 ; X32-CLZ-LABEL: ctlz_bsr_zero_test:
768 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
769 ; X32-CLZ-NEXT: xorl $31, %eax
772 ; X64-CLZ-LABEL: ctlz_bsr_zero_test:
774 ; X64-CLZ-NEXT: lzcntl %edi, %eax
775 ; X64-CLZ-NEXT: xorl $31, %eax
777 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
778 %bsr = xor i32 %ctlz, 31
782 define i8 @cttz_i8_knownbits(i8 %x) {
783 ; X32-LABEL: cttz_i8_knownbits:
785 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
786 ; X32-NEXT: orb $2, %al
787 ; X32-NEXT: movzbl %al, %eax
788 ; X32-NEXT: bsfl %eax, %eax
789 ; X32-NEXT: # kill: def $al killed $al killed $eax
792 ; X64-LABEL: cttz_i8_knownbits:
794 ; X64-NEXT: orb $2, %dil
795 ; X64-NEXT: movzbl %dil, %eax
796 ; X64-NEXT: bsfl %eax, %eax
797 ; X64-NEXT: # kill: def $al killed $al killed $eax
800 ; X32-CLZ-LABEL: cttz_i8_knownbits:
802 ; X32-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al
803 ; X32-CLZ-NEXT: orb $2, %al
804 ; X32-CLZ-NEXT: movzbl %al, %eax
805 ; X32-CLZ-NEXT: tzcntl %eax, %eax
806 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
809 ; X64-CLZ-LABEL: cttz_i8_knownbits:
811 ; X64-CLZ-NEXT: orb $2, %dil
812 ; X64-CLZ-NEXT: movzbl %dil, %eax
813 ; X64-CLZ-NEXT: tzcntl %eax, %eax
814 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
817 %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
818 %tmp2 = and i8 %tmp, 1
822 define i8 @ctlz_i8_knownbits(i8 %x) {
823 ; X32-LABEL: ctlz_i8_knownbits:
825 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
826 ; X32-NEXT: orb $64, %al
827 ; X32-NEXT: movzbl %al, %eax
828 ; X32-NEXT: bsrl %eax, %eax
829 ; X32-NEXT: xorl $7, %eax
830 ; X32-NEXT: # kill: def $al killed $al killed $eax
833 ; X64-LABEL: ctlz_i8_knownbits:
835 ; X64-NEXT: orb $64, %dil
836 ; X64-NEXT: movzbl %dil, %eax
837 ; X64-NEXT: bsrl %eax, %eax
838 ; X64-NEXT: xorl $7, %eax
839 ; X64-NEXT: # kill: def $al killed $al killed $eax
842 ; X32-CLZ-LABEL: ctlz_i8_knownbits:
844 ; X32-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al
845 ; X32-CLZ-NEXT: orb $64, %al
846 ; X32-CLZ-NEXT: movzbl %al, %eax
847 ; X32-CLZ-NEXT: lzcntl %eax, %eax
848 ; X32-CLZ-NEXT: addl $-24, %eax
849 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
852 ; X64-CLZ-LABEL: ctlz_i8_knownbits:
854 ; X64-CLZ-NEXT: orb $64, %dil
855 ; X64-CLZ-NEXT: movzbl %dil, %eax
856 ; X64-CLZ-NEXT: lzcntl %eax, %eax
857 ; X64-CLZ-NEXT: addl $-24, %eax
858 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
862 %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
863 %tmp2 = and i8 %tmp, 1
867 ; Make sure we can detect that the input is non-zero and avoid cmov after BSR
868 ; This is relevant for 32-bit mode without lzcnt
869 define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
870 ; X32-LABEL: ctlz_i64_zero_test_knownneverzero:
872 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
873 ; X32-NEXT: testl %eax, %eax
874 ; X32-NEXT: jne .LBB21_1
876 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
877 ; X32-NEXT: orl $1, %eax
878 ; X32-NEXT: bsrl %eax, %eax
879 ; X32-NEXT: xorl $31, %eax
880 ; X32-NEXT: orl $32, %eax
881 ; X32-NEXT: xorl %edx, %edx
883 ; X32-NEXT: .LBB21_1:
884 ; X32-NEXT: bsrl %eax, %eax
885 ; X32-NEXT: xorl $31, %eax
886 ; X32-NEXT: xorl %edx, %edx
889 ; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
891 ; X64-NEXT: orq $1, %rdi
892 ; X64-NEXT: je .LBB21_1
893 ; X64-NEXT: # %bb.2: # %cond.false
894 ; X64-NEXT: bsrq %rdi, %rax
895 ; X64-NEXT: xorq $63, %rax
897 ; X64-NEXT: .LBB21_1:
898 ; X64-NEXT: movl $64, %eax
901 ; X32-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
903 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
904 ; X32-CLZ-NEXT: testl %eax, %eax
905 ; X32-CLZ-NEXT: jne .LBB21_1
906 ; X32-CLZ-NEXT: # %bb.2:
907 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
908 ; X32-CLZ-NEXT: orl $1, %eax
909 ; X32-CLZ-NEXT: lzcntl %eax, %eax
910 ; X32-CLZ-NEXT: orl $32, %eax
911 ; X32-CLZ-NEXT: xorl %edx, %edx
913 ; X32-CLZ-NEXT: .LBB21_1:
914 ; X32-CLZ-NEXT: lzcntl %eax, %eax
915 ; X32-CLZ-NEXT: xorl %edx, %edx
918 ; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
920 ; X64-CLZ-NEXT: orq $1, %rdi
921 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
924 %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false)
928 ; Make sure we can detect that the input is non-zero and avoid cmov after BSF
929 ; This is relevant for 32-bit mode without tzcnt
930 define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
931 ; X32-LABEL: cttz_i64_zero_test_knownneverzero:
933 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
934 ; X32-NEXT: testl %eax, %eax
935 ; X32-NEXT: jne .LBB22_1
937 ; X32-NEXT: movl $-2147483648, %eax # imm = 0x80000000
938 ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
939 ; X32-NEXT: bsfl %eax, %eax
940 ; X32-NEXT: orl $32, %eax
941 ; X32-NEXT: xorl %edx, %edx
943 ; X32-NEXT: .LBB22_1:
944 ; X32-NEXT: bsfl %eax, %eax
945 ; X32-NEXT: xorl %edx, %edx
948 ; X64-LABEL: cttz_i64_zero_test_knownneverzero:
950 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
951 ; X64-NEXT: orq %rdi, %rax
952 ; X64-NEXT: je .LBB22_1
953 ; X64-NEXT: # %bb.2: # %cond.false
954 ; X64-NEXT: bsfq %rax, %rax
956 ; X64-NEXT: .LBB22_1:
957 ; X64-NEXT: movl $64, %eax
960 ; X32-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
962 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
963 ; X32-CLZ-NEXT: testl %eax, %eax
964 ; X32-CLZ-NEXT: jne .LBB22_1
965 ; X32-CLZ-NEXT: # %bb.2:
966 ; X32-CLZ-NEXT: movl $-2147483648, %eax # imm = 0x80000000
967 ; X32-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
968 ; X32-CLZ-NEXT: tzcntl %eax, %eax
969 ; X32-CLZ-NEXT: orl $32, %eax
970 ; X32-CLZ-NEXT: xorl %edx, %edx
972 ; X32-CLZ-NEXT: .LBB22_1:
973 ; X32-CLZ-NEXT: tzcntl %eax, %eax
974 ; X32-CLZ-NEXT: xorl %edx, %edx
977 ; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
979 ; X64-CLZ-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
980 ; X64-CLZ-NEXT: orq %rdi, %rax
981 ; X64-CLZ-NEXT: tzcntq %rax, %rax
983 %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000
984 %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false)