1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ
8 declare i8 @llvm.cttz.i8(i8, i1)
9 declare i16 @llvm.cttz.i16(i16, i1)
10 declare i32 @llvm.cttz.i32(i32, i1)
11 declare i64 @llvm.cttz.i64(i64, i1)
13 declare i8 @llvm.ctlz.i8(i8, i1)
14 declare i16 @llvm.ctlz.i16(i16, i1)
15 declare i32 @llvm.ctlz.i32(i32, i1)
16 declare i64 @llvm.ctlz.i64(i64, i1)
18 define i8 @cttz_i8(i8 %x) {
21 ; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax
22 ; X86-NEXT: # kill: def $al killed $al killed $eax
27 ; X64-NEXT: bsfl %edi, %eax
28 ; X64-NEXT: # kill: def $al killed $al killed $eax
31 ; X86-CLZ-LABEL: cttz_i8:
33 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
34 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
37 ; X64-CLZ-LABEL: cttz_i8:
39 ; X64-CLZ-NEXT: tzcntl %edi, %eax
40 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
42 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
46 define i16 @cttz_i16(i16 %x) {
47 ; X86-LABEL: cttz_i16:
49 ; X86-NEXT: bsfw {{[0-9]+}}(%esp), %ax
52 ; X64-LABEL: cttz_i16:
54 ; X64-NEXT: bsfw %di, %ax
57 ; X86-CLZ-LABEL: cttz_i16:
59 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
60 ; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
63 ; X64-CLZ-LABEL: cttz_i16:
65 ; X64-CLZ-NEXT: tzcntl %edi, %eax
66 ; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
68 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
72 define i32 @cttz_i32(i32 %x) {
73 ; X86-LABEL: cttz_i32:
75 ; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax
78 ; X64-LABEL: cttz_i32:
80 ; X64-NEXT: bsfl %edi, %eax
83 ; X86-CLZ-LABEL: cttz_i32:
85 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
88 ; X64-CLZ-LABEL: cttz_i32:
90 ; X64-CLZ-NEXT: tzcntl %edi, %eax
92 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
96 define i64 @cttz_i64(i64 %x) {
97 ; X86-NOCMOV-LABEL: cttz_i64:
98 ; X86-NOCMOV: # %bb.0:
99 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
100 ; X86-NOCMOV-NEXT: testl %eax, %eax
101 ; X86-NOCMOV-NEXT: jne .LBB3_1
102 ; X86-NOCMOV-NEXT: # %bb.2:
103 ; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %eax
104 ; X86-NOCMOV-NEXT: addl $32, %eax
105 ; X86-NOCMOV-NEXT: xorl %edx, %edx
106 ; X86-NOCMOV-NEXT: retl
107 ; X86-NOCMOV-NEXT: .LBB3_1:
108 ; X86-NOCMOV-NEXT: bsfl %eax, %eax
109 ; X86-NOCMOV-NEXT: xorl %edx, %edx
110 ; X86-NOCMOV-NEXT: retl
112 ; X86-CMOV-LABEL: cttz_i64:
114 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
115 ; X86-CMOV-NEXT: bsfl %ecx, %edx
116 ; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %eax
117 ; X86-CMOV-NEXT: addl $32, %eax
118 ; X86-CMOV-NEXT: testl %ecx, %ecx
119 ; X86-CMOV-NEXT: cmovnel %edx, %eax
120 ; X86-CMOV-NEXT: xorl %edx, %edx
121 ; X86-CMOV-NEXT: retl
123 ; X64-LABEL: cttz_i64:
125 ; X64-NEXT: bsfq %rdi, %rax
128 ; X86-CLZ-LABEL: cttz_i64:
130 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
131 ; X86-CLZ-NEXT: testl %eax, %eax
132 ; X86-CLZ-NEXT: jne .LBB3_1
133 ; X86-CLZ-NEXT: # %bb.2:
134 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
135 ; X86-CLZ-NEXT: addl $32, %eax
136 ; X86-CLZ-NEXT: xorl %edx, %edx
138 ; X86-CLZ-NEXT: .LBB3_1:
139 ; X86-CLZ-NEXT: tzcntl %eax, %eax
140 ; X86-CLZ-NEXT: xorl %edx, %edx
143 ; X64-CLZ-LABEL: cttz_i64:
145 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
147 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
151 define i8 @ctlz_i8(i8 %x) {
152 ; X86-LABEL: ctlz_i8:
154 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
155 ; X86-NEXT: bsrl %eax, %eax
156 ; X86-NEXT: xorl $7, %eax
157 ; X86-NEXT: # kill: def $al killed $al killed $eax
160 ; X64-LABEL: ctlz_i8:
162 ; X64-NEXT: movzbl %dil, %eax
163 ; X64-NEXT: bsrl %eax, %eax
164 ; X64-NEXT: xorl $7, %eax
165 ; X64-NEXT: # kill: def $al killed $al killed $eax
168 ; X86-CLZ-LABEL: ctlz_i8:
170 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
171 ; X86-CLZ-NEXT: lzcntl %eax, %eax
172 ; X86-CLZ-NEXT: addl $-24, %eax
173 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
176 ; X64-CLZ-LABEL: ctlz_i8:
178 ; X64-CLZ-NEXT: movzbl %dil, %eax
179 ; X64-CLZ-NEXT: lzcntl %eax, %eax
180 ; X64-CLZ-NEXT: addl $-24, %eax
181 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
183 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
187 define i16 @ctlz_i16(i16 %x) {
188 ; X86-LABEL: ctlz_i16:
190 ; X86-NEXT: bsrw {{[0-9]+}}(%esp), %ax
191 ; X86-NEXT: xorl $15, %eax
192 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
195 ; X64-LABEL: ctlz_i16:
197 ; X64-NEXT: bsrw %di, %ax
198 ; X64-NEXT: xorl $15, %eax
199 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
202 ; X86-CLZ-LABEL: ctlz_i16:
204 ; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
207 ; X64-CLZ-LABEL: ctlz_i16:
209 ; X64-CLZ-NEXT: lzcntw %di, %ax
211 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
215 define i32 @ctlz_i32(i32 %x) {
216 ; X86-LABEL: ctlz_i32:
218 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax
219 ; X86-NEXT: xorl $31, %eax
222 ; X64-LABEL: ctlz_i32:
224 ; X64-NEXT: bsrl %edi, %eax
225 ; X64-NEXT: xorl $31, %eax
228 ; X86-CLZ-LABEL: ctlz_i32:
230 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
233 ; X64-CLZ-LABEL: ctlz_i32:
235 ; X64-CLZ-NEXT: lzcntl %edi, %eax
237 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
241 define i64 @ctlz_i64(i64 %x) {
242 ; X86-NOCMOV-LABEL: ctlz_i64:
243 ; X86-NOCMOV: # %bb.0:
244 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
245 ; X86-NOCMOV-NEXT: testl %eax, %eax
246 ; X86-NOCMOV-NEXT: jne .LBB7_1
247 ; X86-NOCMOV-NEXT: # %bb.2:
248 ; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
249 ; X86-NOCMOV-NEXT: xorl $31, %eax
250 ; X86-NOCMOV-NEXT: addl $32, %eax
251 ; X86-NOCMOV-NEXT: xorl %edx, %edx
252 ; X86-NOCMOV-NEXT: retl
253 ; X86-NOCMOV-NEXT: .LBB7_1:
254 ; X86-NOCMOV-NEXT: bsrl %eax, %eax
255 ; X86-NOCMOV-NEXT: xorl $31, %eax
256 ; X86-NOCMOV-NEXT: xorl %edx, %edx
257 ; X86-NOCMOV-NEXT: retl
259 ; X86-CMOV-LABEL: ctlz_i64:
261 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
262 ; X86-CMOV-NEXT: bsrl %ecx, %edx
263 ; X86-CMOV-NEXT: xorl $31, %edx
264 ; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
265 ; X86-CMOV-NEXT: xorl $31, %eax
266 ; X86-CMOV-NEXT: addl $32, %eax
267 ; X86-CMOV-NEXT: testl %ecx, %ecx
268 ; X86-CMOV-NEXT: cmovnel %edx, %eax
269 ; X86-CMOV-NEXT: xorl %edx, %edx
270 ; X86-CMOV-NEXT: retl
272 ; X64-LABEL: ctlz_i64:
274 ; X64-NEXT: bsrq %rdi, %rax
275 ; X64-NEXT: xorq $63, %rax
278 ; X86-CLZ-LABEL: ctlz_i64:
280 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
281 ; X86-CLZ-NEXT: testl %eax, %eax
282 ; X86-CLZ-NEXT: jne .LBB7_1
283 ; X86-CLZ-NEXT: # %bb.2:
284 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
285 ; X86-CLZ-NEXT: addl $32, %eax
286 ; X86-CLZ-NEXT: xorl %edx, %edx
288 ; X86-CLZ-NEXT: .LBB7_1:
289 ; X86-CLZ-NEXT: lzcntl %eax, %eax
290 ; X86-CLZ-NEXT: xorl %edx, %edx
293 ; X64-CLZ-LABEL: ctlz_i64:
295 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
297 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
301 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
302 define i8 @ctlz_i8_zero_test(i8 %n) {
303 ; X86-LABEL: ctlz_i8_zero_test:
305 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
306 ; X86-NEXT: testb %al, %al
307 ; X86-NEXT: je .LBB8_1
308 ; X86-NEXT: # %bb.2: # %cond.false
309 ; X86-NEXT: movzbl %al, %eax
310 ; X86-NEXT: bsrl %eax, %eax
311 ; X86-NEXT: xorl $7, %eax
312 ; X86-NEXT: # kill: def $al killed $al killed $eax
315 ; X86-NEXT: movb $8, %al
316 ; X86-NEXT: # kill: def $al killed $al killed $eax
319 ; X64-LABEL: ctlz_i8_zero_test:
321 ; X64-NEXT: testb %dil, %dil
322 ; X64-NEXT: je .LBB8_1
323 ; X64-NEXT: # %bb.2: # %cond.false
324 ; X64-NEXT: movzbl %dil, %eax
325 ; X64-NEXT: bsrl %eax, %eax
326 ; X64-NEXT: xorl $7, %eax
327 ; X64-NEXT: # kill: def $al killed $al killed $eax
330 ; X64-NEXT: movb $8, %al
331 ; X64-NEXT: # kill: def $al killed $al killed $eax
334 ; X86-CLZ-LABEL: ctlz_i8_zero_test:
336 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
337 ; X86-CLZ-NEXT: lzcntl %eax, %eax
338 ; X86-CLZ-NEXT: addl $-24, %eax
339 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
342 ; X64-CLZ-LABEL: ctlz_i8_zero_test:
344 ; X64-CLZ-NEXT: movzbl %dil, %eax
345 ; X64-CLZ-NEXT: lzcntl %eax, %eax
346 ; X64-CLZ-NEXT: addl $-24, %eax
347 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
349 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
353 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
354 define i16 @ctlz_i16_zero_test(i16 %n) {
355 ; X86-LABEL: ctlz_i16_zero_test:
357 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
358 ; X86-NEXT: testw %ax, %ax
359 ; X86-NEXT: je .LBB9_1
360 ; X86-NEXT: # %bb.2: # %cond.false
361 ; X86-NEXT: bsrw %ax, %ax
362 ; X86-NEXT: xorl $15, %eax
363 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
366 ; X86-NEXT: movw $16, %ax
367 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
370 ; X64-LABEL: ctlz_i16_zero_test:
372 ; X64-NEXT: testw %di, %di
373 ; X64-NEXT: je .LBB9_1
374 ; X64-NEXT: # %bb.2: # %cond.false
375 ; X64-NEXT: bsrw %di, %ax
376 ; X64-NEXT: xorl $15, %eax
377 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
380 ; X64-NEXT: movw $16, %ax
381 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
384 ; X86-CLZ-LABEL: ctlz_i16_zero_test:
386 ; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
389 ; X64-CLZ-LABEL: ctlz_i16_zero_test:
391 ; X64-CLZ-NEXT: lzcntw %di, %ax
393 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
397 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
398 define i32 @ctlz_i32_zero_test(i32 %n) {
399 ; X86-LABEL: ctlz_i32_zero_test:
401 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
402 ; X86-NEXT: testl %eax, %eax
403 ; X86-NEXT: je .LBB10_1
404 ; X86-NEXT: # %bb.2: # %cond.false
405 ; X86-NEXT: bsrl %eax, %eax
406 ; X86-NEXT: xorl $31, %eax
408 ; X86-NEXT: .LBB10_1:
409 ; X86-NEXT: movl $32, %eax
412 ; X64-LABEL: ctlz_i32_zero_test:
414 ; X64-NEXT: testl %edi, %edi
415 ; X64-NEXT: je .LBB10_1
416 ; X64-NEXT: # %bb.2: # %cond.false
417 ; X64-NEXT: bsrl %edi, %eax
418 ; X64-NEXT: xorl $31, %eax
420 ; X64-NEXT: .LBB10_1:
421 ; X64-NEXT: movl $32, %eax
424 ; X86-CLZ-LABEL: ctlz_i32_zero_test:
426 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
429 ; X64-CLZ-LABEL: ctlz_i32_zero_test:
431 ; X64-CLZ-NEXT: lzcntl %edi, %eax
433 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
437 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
438 define i64 @ctlz_i64_zero_test(i64 %n) {
439 ; X86-NOCMOV-LABEL: ctlz_i64_zero_test:
440 ; X86-NOCMOV: # %bb.0:
441 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
442 ; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %edx
443 ; X86-NOCMOV-NEXT: movl $63, %eax
444 ; X86-NOCMOV-NEXT: je .LBB11_2
445 ; X86-NOCMOV-NEXT: # %bb.1:
446 ; X86-NOCMOV-NEXT: movl %edx, %eax
447 ; X86-NOCMOV-NEXT: .LBB11_2:
448 ; X86-NOCMOV-NEXT: testl %ecx, %ecx
449 ; X86-NOCMOV-NEXT: jne .LBB11_3
450 ; X86-NOCMOV-NEXT: # %bb.4:
451 ; X86-NOCMOV-NEXT: xorl $31, %eax
452 ; X86-NOCMOV-NEXT: addl $32, %eax
453 ; X86-NOCMOV-NEXT: xorl %edx, %edx
454 ; X86-NOCMOV-NEXT: retl
455 ; X86-NOCMOV-NEXT: .LBB11_3:
456 ; X86-NOCMOV-NEXT: bsrl %ecx, %eax
457 ; X86-NOCMOV-NEXT: xorl $31, %eax
458 ; X86-NOCMOV-NEXT: xorl %edx, %edx
459 ; X86-NOCMOV-NEXT: retl
461 ; X86-CMOV-LABEL: ctlz_i64_zero_test:
463 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
464 ; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
465 ; X86-CMOV-NEXT: movl $63, %edx
466 ; X86-CMOV-NEXT: cmovnel %eax, %edx
467 ; X86-CMOV-NEXT: xorl $31, %edx
468 ; X86-CMOV-NEXT: addl $32, %edx
469 ; X86-CMOV-NEXT: bsrl %ecx, %eax
470 ; X86-CMOV-NEXT: xorl $31, %eax
471 ; X86-CMOV-NEXT: testl %ecx, %ecx
472 ; X86-CMOV-NEXT: cmovel %edx, %eax
473 ; X86-CMOV-NEXT: xorl %edx, %edx
474 ; X86-CMOV-NEXT: retl
476 ; X64-LABEL: ctlz_i64_zero_test:
478 ; X64-NEXT: testq %rdi, %rdi
479 ; X64-NEXT: je .LBB11_1
480 ; X64-NEXT: # %bb.2: # %cond.false
481 ; X64-NEXT: bsrq %rdi, %rax
482 ; X64-NEXT: xorq $63, %rax
484 ; X64-NEXT: .LBB11_1:
485 ; X64-NEXT: movl $64, %eax
488 ; X86-CLZ-LABEL: ctlz_i64_zero_test:
490 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
491 ; X86-CLZ-NEXT: testl %eax, %eax
492 ; X86-CLZ-NEXT: jne .LBB11_1
493 ; X86-CLZ-NEXT: # %bb.2:
494 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
495 ; X86-CLZ-NEXT: addl $32, %eax
496 ; X86-CLZ-NEXT: xorl %edx, %edx
498 ; X86-CLZ-NEXT: .LBB11_1:
499 ; X86-CLZ-NEXT: lzcntl %eax, %eax
500 ; X86-CLZ-NEXT: xorl %edx, %edx
503 ; X64-CLZ-LABEL: ctlz_i64_zero_test:
505 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
507 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
511 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
512 define i8 @cttz_i8_zero_test(i8 %n) {
513 ; X86-LABEL: cttz_i8_zero_test:
515 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
516 ; X86-NEXT: testb %al, %al
517 ; X86-NEXT: je .LBB12_1
518 ; X86-NEXT: # %bb.2: # %cond.false
519 ; X86-NEXT: movzbl %al, %eax
520 ; X86-NEXT: bsfl %eax, %eax
521 ; X86-NEXT: # kill: def $al killed $al killed $eax
523 ; X86-NEXT: .LBB12_1:
524 ; X86-NEXT: movb $8, %al
525 ; X86-NEXT: # kill: def $al killed $al killed $eax
528 ; X64-LABEL: cttz_i8_zero_test:
530 ; X64-NEXT: testb %dil, %dil
531 ; X64-NEXT: je .LBB12_1
532 ; X64-NEXT: # %bb.2: # %cond.false
533 ; X64-NEXT: movzbl %dil, %eax
534 ; X64-NEXT: bsfl %eax, %eax
535 ; X64-NEXT: # kill: def $al killed $al killed $eax
537 ; X64-NEXT: .LBB12_1:
538 ; X64-NEXT: movb $8, %al
539 ; X64-NEXT: # kill: def $al killed $al killed $eax
542 ; X86-CLZ-LABEL: cttz_i8_zero_test:
544 ; X86-CLZ-NEXT: movl $256, %eax # imm = 0x100
545 ; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
546 ; X86-CLZ-NEXT: tzcntl %eax, %eax
547 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
550 ; X64-CLZ-LABEL: cttz_i8_zero_test:
552 ; X64-CLZ-NEXT: orl $256, %edi # imm = 0x100
553 ; X64-CLZ-NEXT: tzcntl %edi, %eax
554 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
556 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
560 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
561 define i16 @cttz_i16_zero_test(i16 %n) {
562 ; X86-LABEL: cttz_i16_zero_test:
564 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
565 ; X86-NEXT: testw %ax, %ax
566 ; X86-NEXT: je .LBB13_1
567 ; X86-NEXT: # %bb.2: # %cond.false
568 ; X86-NEXT: bsfw %ax, %ax
570 ; X86-NEXT: .LBB13_1:
571 ; X86-NEXT: movw $16, %ax
574 ; X64-LABEL: cttz_i16_zero_test:
576 ; X64-NEXT: testw %di, %di
577 ; X64-NEXT: je .LBB13_1
578 ; X64-NEXT: # %bb.2: # %cond.false
579 ; X64-NEXT: bsfw %di, %ax
581 ; X64-NEXT: .LBB13_1:
582 ; X64-NEXT: movw $16, %ax
585 ; X86-CLZ-LABEL: cttz_i16_zero_test:
587 ; X86-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
590 ; X64-CLZ-LABEL: cttz_i16_zero_test:
592 ; X64-CLZ-NEXT: tzcntw %di, %ax
594 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
598 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
599 define i32 @cttz_i32_zero_test(i32 %n) {
600 ; X86-LABEL: cttz_i32_zero_test:
602 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
603 ; X86-NEXT: testl %eax, %eax
604 ; X86-NEXT: je .LBB14_1
605 ; X86-NEXT: # %bb.2: # %cond.false
606 ; X86-NEXT: bsfl %eax, %eax
608 ; X86-NEXT: .LBB14_1:
609 ; X86-NEXT: movl $32, %eax
612 ; X64-LABEL: cttz_i32_zero_test:
614 ; X64-NEXT: testl %edi, %edi
615 ; X64-NEXT: je .LBB14_1
616 ; X64-NEXT: # %bb.2: # %cond.false
617 ; X64-NEXT: bsfl %edi, %eax
619 ; X64-NEXT: .LBB14_1:
620 ; X64-NEXT: movl $32, %eax
623 ; X86-CLZ-LABEL: cttz_i32_zero_test:
625 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
628 ; X64-CLZ-LABEL: cttz_i32_zero_test:
630 ; X64-CLZ-NEXT: tzcntl %edi, %eax
632 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
636 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
637 define i64 @cttz_i64_zero_test(i64 %n) {
638 ; X86-NOCMOV-LABEL: cttz_i64_zero_test:
639 ; X86-NOCMOV: # %bb.0:
640 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
641 ; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %edx
642 ; X86-NOCMOV-NEXT: movl $32, %eax
643 ; X86-NOCMOV-NEXT: je .LBB15_2
644 ; X86-NOCMOV-NEXT: # %bb.1:
645 ; X86-NOCMOV-NEXT: movl %edx, %eax
646 ; X86-NOCMOV-NEXT: .LBB15_2:
647 ; X86-NOCMOV-NEXT: testl %ecx, %ecx
648 ; X86-NOCMOV-NEXT: jne .LBB15_3
649 ; X86-NOCMOV-NEXT: # %bb.4:
650 ; X86-NOCMOV-NEXT: addl $32, %eax
651 ; X86-NOCMOV-NEXT: xorl %edx, %edx
652 ; X86-NOCMOV-NEXT: retl
653 ; X86-NOCMOV-NEXT: .LBB15_3:
654 ; X86-NOCMOV-NEXT: bsfl %ecx, %eax
655 ; X86-NOCMOV-NEXT: xorl %edx, %edx
656 ; X86-NOCMOV-NEXT: retl
658 ; X86-CMOV-LABEL: cttz_i64_zero_test:
660 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
661 ; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx
662 ; X86-CMOV-NEXT: movl $32, %edx
663 ; X86-CMOV-NEXT: cmovnel %ecx, %edx
664 ; X86-CMOV-NEXT: addl $32, %edx
665 ; X86-CMOV-NEXT: bsfl %eax, %eax
666 ; X86-CMOV-NEXT: cmovel %edx, %eax
667 ; X86-CMOV-NEXT: xorl %edx, %edx
668 ; X86-CMOV-NEXT: retl
670 ; X64-LABEL: cttz_i64_zero_test:
672 ; X64-NEXT: testq %rdi, %rdi
673 ; X64-NEXT: je .LBB15_1
674 ; X64-NEXT: # %bb.2: # %cond.false
675 ; X64-NEXT: bsfq %rdi, %rax
677 ; X64-NEXT: .LBB15_1:
678 ; X64-NEXT: movl $64, %eax
681 ; X86-CLZ-LABEL: cttz_i64_zero_test:
683 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
684 ; X86-CLZ-NEXT: testl %eax, %eax
685 ; X86-CLZ-NEXT: jne .LBB15_1
686 ; X86-CLZ-NEXT: # %bb.2:
687 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
688 ; X86-CLZ-NEXT: addl $32, %eax
689 ; X86-CLZ-NEXT: xorl %edx, %edx
691 ; X86-CLZ-NEXT: .LBB15_1:
692 ; X86-CLZ-NEXT: tzcntl %eax, %eax
693 ; X86-CLZ-NEXT: xorl %edx, %edx
696 ; X64-CLZ-LABEL: cttz_i64_zero_test:
698 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
700 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
704 ; Don't generate the cmovne when the source is known non-zero (and bsr would
707 define i32 @ctlz_i32_fold_cmov(i32 %n) {
708 ; X86-LABEL: ctlz_i32_fold_cmov:
710 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
711 ; X86-NEXT: orl $1, %eax
712 ; X86-NEXT: bsrl %eax, %eax
713 ; X86-NEXT: xorl $31, %eax
716 ; X64-LABEL: ctlz_i32_fold_cmov:
718 ; X64-NEXT: orl $1, %edi
719 ; X64-NEXT: bsrl %edi, %eax
720 ; X64-NEXT: xorl $31, %eax
723 ; X86-CLZ-LABEL: ctlz_i32_fold_cmov:
725 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
726 ; X86-CLZ-NEXT: orl $1, %eax
727 ; X86-CLZ-NEXT: lzcntl %eax, %eax
730 ; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
732 ; X64-CLZ-NEXT: orl $1, %edi
733 ; X64-CLZ-NEXT: lzcntl %edi, %eax
736 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
740 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
741 ; the most significant bit, which is what 'bsr' does natively.
742 ; FIXME: We should probably select BSR instead of LZCNT in these circumstances.
743 define i32 @ctlz_bsr(i32 %n) {
744 ; X86-LABEL: ctlz_bsr:
746 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax
749 ; X64-LABEL: ctlz_bsr:
751 ; X64-NEXT: bsrl %edi, %eax
754 ; X86-CLZ-LABEL: ctlz_bsr:
756 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
757 ; X86-CLZ-NEXT: xorl $31, %eax
760 ; X64-CLZ-LABEL: ctlz_bsr:
762 ; X64-CLZ-NEXT: lzcntl %edi, %eax
763 ; X64-CLZ-NEXT: xorl $31, %eax
765 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
766 %bsr = xor i32 %ctlz, 31
770 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
771 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
772 ; codegen doesn't know how to combine the $32 and $31 into $63.
773 define i32 @ctlz_bsr_zero_test(i32 %n) {
774 ; X86-LABEL: ctlz_bsr_zero_test:
776 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
777 ; X86-NEXT: testl %eax, %eax
778 ; X86-NEXT: je .LBB18_1
779 ; X86-NEXT: # %bb.2: # %cond.false
780 ; X86-NEXT: bsrl %eax, %eax
781 ; X86-NEXT: xorl $31, %eax
782 ; X86-NEXT: xorl $31, %eax
784 ; X86-NEXT: .LBB18_1:
785 ; X86-NEXT: movl $32, %eax
786 ; X86-NEXT: xorl $31, %eax
789 ; X64-LABEL: ctlz_bsr_zero_test:
791 ; X64-NEXT: testl %edi, %edi
792 ; X64-NEXT: je .LBB18_1
793 ; X64-NEXT: # %bb.2: # %cond.false
794 ; X64-NEXT: bsrl %edi, %eax
795 ; X64-NEXT: xorl $31, %eax
796 ; X64-NEXT: xorl $31, %eax
798 ; X64-NEXT: .LBB18_1:
799 ; X64-NEXT: movl $32, %eax
800 ; X64-NEXT: xorl $31, %eax
803 ; X86-CLZ-LABEL: ctlz_bsr_zero_test:
805 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
806 ; X86-CLZ-NEXT: xorl $31, %eax
809 ; X64-CLZ-LABEL: ctlz_bsr_zero_test:
811 ; X64-CLZ-NEXT: lzcntl %edi, %eax
812 ; X64-CLZ-NEXT: xorl $31, %eax
814 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
815 %bsr = xor i32 %ctlz, 31
819 define i8 @cttz_i8_knownbits(i8 %x) {
820 ; X86-LABEL: cttz_i8_knownbits:
822 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
823 ; X86-NEXT: orb $2, %al
824 ; X86-NEXT: movzbl %al, %eax
825 ; X86-NEXT: bsfl %eax, %eax
826 ; X86-NEXT: # kill: def $al killed $al killed $eax
829 ; X64-LABEL: cttz_i8_knownbits:
831 ; X64-NEXT: orb $2, %dil
832 ; X64-NEXT: movzbl %dil, %eax
833 ; X64-NEXT: bsfl %eax, %eax
834 ; X64-NEXT: # kill: def $al killed $al killed $eax
837 ; X86-CLZ-LABEL: cttz_i8_knownbits:
839 ; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al
840 ; X86-CLZ-NEXT: orb $2, %al
841 ; X86-CLZ-NEXT: movzbl %al, %eax
842 ; X86-CLZ-NEXT: tzcntl %eax, %eax
843 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
846 ; X64-CLZ-LABEL: cttz_i8_knownbits:
848 ; X64-CLZ-NEXT: orb $2, %dil
849 ; X64-CLZ-NEXT: movzbl %dil, %eax
850 ; X64-CLZ-NEXT: tzcntl %eax, %eax
851 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
854 %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
855 %tmp2 = and i8 %tmp, 1
859 define i8 @ctlz_i8_knownbits(i8 %x) {
860 ; X86-LABEL: ctlz_i8_knownbits:
862 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
863 ; X86-NEXT: orb $64, %al
864 ; X86-NEXT: movzbl %al, %eax
865 ; X86-NEXT: bsrl %eax, %eax
866 ; X86-NEXT: xorl $7, %eax
867 ; X86-NEXT: # kill: def $al killed $al killed $eax
870 ; X64-LABEL: ctlz_i8_knownbits:
872 ; X64-NEXT: orb $64, %dil
873 ; X64-NEXT: movzbl %dil, %eax
874 ; X64-NEXT: bsrl %eax, %eax
875 ; X64-NEXT: xorl $7, %eax
876 ; X64-NEXT: # kill: def $al killed $al killed $eax
879 ; X86-CLZ-LABEL: ctlz_i8_knownbits:
881 ; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al
882 ; X86-CLZ-NEXT: orb $64, %al
883 ; X86-CLZ-NEXT: movzbl %al, %eax
884 ; X86-CLZ-NEXT: lzcntl %eax, %eax
885 ; X86-CLZ-NEXT: addl $-24, %eax
886 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
889 ; X64-CLZ-LABEL: ctlz_i8_knownbits:
891 ; X64-CLZ-NEXT: orb $64, %dil
892 ; X64-CLZ-NEXT: movzbl %dil, %eax
893 ; X64-CLZ-NEXT: lzcntl %eax, %eax
894 ; X64-CLZ-NEXT: addl $-24, %eax
895 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
899 %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
900 %tmp2 = and i8 %tmp, 1
904 ; Make sure we can detect that the input is non-zero and avoid cmov after BSR
905 ; This is relevant for 32-bit mode without lzcnt
906 define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
907 ; X86-NOCMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
908 ; X86-NOCMOV: # %bb.0:
909 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
910 ; X86-NOCMOV-NEXT: testl %eax, %eax
911 ; X86-NOCMOV-NEXT: jne .LBB21_1
912 ; X86-NOCMOV-NEXT: # %bb.2:
913 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
914 ; X86-NOCMOV-NEXT: orl $1, %eax
915 ; X86-NOCMOV-NEXT: bsrl %eax, %eax
916 ; X86-NOCMOV-NEXT: xorl $31, %eax
917 ; X86-NOCMOV-NEXT: orl $32, %eax
918 ; X86-NOCMOV-NEXT: xorl %edx, %edx
919 ; X86-NOCMOV-NEXT: retl
920 ; X86-NOCMOV-NEXT: .LBB21_1:
921 ; X86-NOCMOV-NEXT: bsrl %eax, %eax
922 ; X86-NOCMOV-NEXT: xorl $31, %eax
923 ; X86-NOCMOV-NEXT: xorl %edx, %edx
924 ; X86-NOCMOV-NEXT: retl
926 ; X86-CMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
928 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
929 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
930 ; X86-CMOV-NEXT: orl $1, %eax
931 ; X86-CMOV-NEXT: bsrl %ecx, %edx
932 ; X86-CMOV-NEXT: xorl $31, %edx
933 ; X86-CMOV-NEXT: bsrl %eax, %eax
934 ; X86-CMOV-NEXT: xorl $31, %eax
935 ; X86-CMOV-NEXT: orl $32, %eax
936 ; X86-CMOV-NEXT: testl %ecx, %ecx
937 ; X86-CMOV-NEXT: cmovnel %edx, %eax
938 ; X86-CMOV-NEXT: xorl %edx, %edx
939 ; X86-CMOV-NEXT: retl
941 ; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
943 ; X64-NEXT: orq $1, %rdi
944 ; X64-NEXT: bsrq %rdi, %rax
945 ; X64-NEXT: xorq $63, %rax
948 ; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
950 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
951 ; X86-CLZ-NEXT: testl %eax, %eax
952 ; X86-CLZ-NEXT: jne .LBB21_1
953 ; X86-CLZ-NEXT: # %bb.2:
954 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
955 ; X86-CLZ-NEXT: orl $1, %eax
956 ; X86-CLZ-NEXT: lzcntl %eax, %eax
957 ; X86-CLZ-NEXT: orl $32, %eax
958 ; X86-CLZ-NEXT: xorl %edx, %edx
960 ; X86-CLZ-NEXT: .LBB21_1:
961 ; X86-CLZ-NEXT: lzcntl %eax, %eax
962 ; X86-CLZ-NEXT: xorl %edx, %edx
965 ; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
967 ; X64-CLZ-NEXT: orq $1, %rdi
968 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
971 %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false)
975 ; Make sure we can detect that the input is non-zero and avoid cmov after BSF
976 ; This is relevant for 32-bit mode without tzcnt
977 define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
978 ; X86-NOCMOV-LABEL: cttz_i64_zero_test_knownneverzero:
979 ; X86-NOCMOV: # %bb.0:
980 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
981 ; X86-NOCMOV-NEXT: testl %eax, %eax
982 ; X86-NOCMOV-NEXT: jne .LBB22_1
983 ; X86-NOCMOV-NEXT: # %bb.2:
984 ; X86-NOCMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000
985 ; X86-NOCMOV-NEXT: orl {{[0-9]+}}(%esp), %eax
986 ; X86-NOCMOV-NEXT: bsfl %eax, %eax
987 ; X86-NOCMOV-NEXT: orl $32, %eax
988 ; X86-NOCMOV-NEXT: xorl %edx, %edx
989 ; X86-NOCMOV-NEXT: retl
990 ; X86-NOCMOV-NEXT: .LBB22_1:
991 ; X86-NOCMOV-NEXT: bsfl %eax, %eax
992 ; X86-NOCMOV-NEXT: xorl %edx, %edx
993 ; X86-NOCMOV-NEXT: retl
995 ; X86-CMOV-LABEL: cttz_i64_zero_test_knownneverzero:
997 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
998 ; X86-CMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000
999 ; X86-CMOV-NEXT: orl {{[0-9]+}}(%esp), %eax
1000 ; X86-CMOV-NEXT: bsfl %ecx, %edx
1001 ; X86-CMOV-NEXT: bsfl %eax, %eax
1002 ; X86-CMOV-NEXT: orl $32, %eax
1003 ; X86-CMOV-NEXT: testl %ecx, %ecx
1004 ; X86-CMOV-NEXT: cmovnel %edx, %eax
1005 ; X86-CMOV-NEXT: xorl %edx, %edx
1006 ; X86-CMOV-NEXT: retl
1008 ; X64-LABEL: cttz_i64_zero_test_knownneverzero:
1010 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1011 ; X64-NEXT: orq %rdi, %rax
1012 ; X64-NEXT: bsfq %rax, %rax
1015 ; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
1017 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
1018 ; X86-CLZ-NEXT: testl %eax, %eax
1019 ; X86-CLZ-NEXT: jne .LBB22_1
1020 ; X86-CLZ-NEXT: # %bb.2:
1021 ; X86-CLZ-NEXT: movl $-2147483648, %eax # imm = 0x80000000
1022 ; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
1023 ; X86-CLZ-NEXT: tzcntl %eax, %eax
1024 ; X86-CLZ-NEXT: orl $32, %eax
1025 ; X86-CLZ-NEXT: xorl %edx, %edx
1026 ; X86-CLZ-NEXT: retl
1027 ; X86-CLZ-NEXT: .LBB22_1:
1028 ; X86-CLZ-NEXT: tzcntl %eax, %eax
1029 ; X86-CLZ-NEXT: xorl %edx, %edx
1030 ; X86-CLZ-NEXT: retl
1032 ; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
1034 ; X64-CLZ-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1035 ; X64-CLZ-NEXT: orq %rdi, %rax
1036 ; X64-CLZ-NEXT: tzcntq %rax, %rax
1037 ; X64-CLZ-NEXT: retq
1038 %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000
1039 %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false)
1043 ; Ensure we fold away the XOR(TRUNC(XOR(BSR(X),31)),31).
1044 define i8 @PR47603_trunc(i32 %0) {
1045 ; X86-LABEL: PR47603_trunc:
1047 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax
1048 ; X86-NEXT: # kill: def $al killed $al killed $eax
1051 ; X64-LABEL: PR47603_trunc:
1053 ; X64-NEXT: bsrl %edi, %eax
1054 ; X64-NEXT: # kill: def $al killed $al killed $eax
1057 ; X86-CLZ-LABEL: PR47603_trunc:
1059 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1060 ; X86-CLZ-NEXT: xorb $31, %al
1061 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
1062 ; X86-CLZ-NEXT: retl
1064 ; X64-CLZ-LABEL: PR47603_trunc:
1066 ; X64-CLZ-NEXT: lzcntl %edi, %eax
1067 ; X64-CLZ-NEXT: xorb $31, %al
1068 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
1069 ; X64-CLZ-NEXT: retq
1070 %2 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
1072 %4 = trunc i32 %3 to i8
1076 ; Ensure we fold away the XOR(ZEXT(XOR(BSR(X),31)),31).
1077 define i32 @PR47603_zext(i32 %a0, [32 x i8]* %a1) {
1078 ; X86-LABEL: PR47603_zext:
1080 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1081 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %ecx
1082 ; X86-NEXT: movsbl (%eax,%ecx), %eax
1085 ; X64-LABEL: PR47603_zext:
1087 ; X64-NEXT: bsrl %edi, %eax
1088 ; X64-NEXT: movsbl (%rsi,%rax), %eax
1091 ; X86-CLZ-LABEL: PR47603_zext:
1093 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
1094 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx
1095 ; X86-CLZ-NEXT: xorl $31, %ecx
1096 ; X86-CLZ-NEXT: movsbl (%eax,%ecx), %eax
1097 ; X86-CLZ-NEXT: retl
1099 ; X64-CLZ-LABEL: PR47603_zext:
1101 ; X64-CLZ-NEXT: lzcntl %edi, %eax
1102 ; X64-CLZ-NEXT: xorq $31, %rax
1103 ; X64-CLZ-NEXT: movsbl (%rsi,%rax), %eax
1104 ; X64-CLZ-NEXT: retq
1105 %ctlz = tail call i32 @llvm.ctlz.i32(i32 %a0, i1 true)
1106 %xor = xor i32 %ctlz, 31
1107 %zext = zext i32 %xor to i64
1108 %gep = getelementptr inbounds [32 x i8], [32 x i8]* %a1, i64 0, i64 %zext
1109 %load = load i8, i8* %gep, align 1
1110 %sext = sext i8 %load to i32