1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X64-FASTLZCNT
8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X86-FASTLZCNT
10 declare i8 @llvm.cttz.i8(i8, i1)
11 declare i16 @llvm.cttz.i16(i16, i1)
12 declare i32 @llvm.cttz.i32(i32, i1)
13 declare i64 @llvm.cttz.i64(i64, i1)
15 declare i8 @llvm.ctlz.i8(i8, i1)
16 declare i16 @llvm.ctlz.i16(i16, i1)
17 declare i32 @llvm.ctlz.i32(i32, i1)
18 declare i64 @llvm.ctlz.i64(i64, i1)
20 define i8 @cttz_i8(i8 %x) {
23 ; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax
24 ; X86-NEXT: # kill: def $al killed $al killed $eax
29 ; X64-NEXT: rep bsfl %edi, %eax
30 ; X64-NEXT: # kill: def $al killed $al killed $eax
33 ; X86-CLZ-LABEL: cttz_i8:
35 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
36 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
39 ; X64-CLZ-LABEL: cttz_i8:
41 ; X64-CLZ-NEXT: tzcntl %edi, %eax
42 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
45 ; X64-FASTLZCNT-LABEL: cttz_i8:
46 ; X64-FASTLZCNT: # %bb.0:
47 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
48 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
49 ; X64-FASTLZCNT-NEXT: retq
51 ; X86-FASTLZCNT-LABEL: cttz_i8:
52 ; X86-FASTLZCNT: # %bb.0:
53 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
54 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
55 ; X86-FASTLZCNT-NEXT: retl
56 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
60 define i16 @cttz_i16(i16 %x) {
61 ; X86-LABEL: cttz_i16:
63 ; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax
64 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
67 ; X64-LABEL: cttz_i16:
69 ; X64-NEXT: rep bsfl %edi, %eax
70 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
73 ; X86-CLZ-LABEL: cttz_i16:
75 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
76 ; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
79 ; X64-CLZ-LABEL: cttz_i16:
81 ; X64-CLZ-NEXT: tzcntl %edi, %eax
82 ; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
85 ; X64-FASTLZCNT-LABEL: cttz_i16:
86 ; X64-FASTLZCNT: # %bb.0:
87 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
88 ; X64-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax
89 ; X64-FASTLZCNT-NEXT: retq
91 ; X86-FASTLZCNT-LABEL: cttz_i16:
92 ; X86-FASTLZCNT: # %bb.0:
93 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
94 ; X86-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax
95 ; X86-FASTLZCNT-NEXT: retl
96 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
100 define i32 @cttz_i32(i32 %x) {
101 ; X86-LABEL: cttz_i32:
103 ; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax
106 ; X64-LABEL: cttz_i32:
108 ; X64-NEXT: rep bsfl %edi, %eax
111 ; X86-CLZ-LABEL: cttz_i32:
113 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
116 ; X64-CLZ-LABEL: cttz_i32:
118 ; X64-CLZ-NEXT: tzcntl %edi, %eax
121 ; X64-FASTLZCNT-LABEL: cttz_i32:
122 ; X64-FASTLZCNT: # %bb.0:
123 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
124 ; X64-FASTLZCNT-NEXT: retq
126 ; X86-FASTLZCNT-LABEL: cttz_i32:
127 ; X86-FASTLZCNT: # %bb.0:
128 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
129 ; X86-FASTLZCNT-NEXT: retl
130 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
134 define i64 @cttz_i64(i64 %x) {
135 ; X86-NOCMOV-LABEL: cttz_i64:
136 ; X86-NOCMOV: # %bb.0:
137 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
138 ; X86-NOCMOV-NEXT: testl %eax, %eax
139 ; X86-NOCMOV-NEXT: jne .LBB3_1
140 ; X86-NOCMOV-NEXT: # %bb.2:
141 ; X86-NOCMOV-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax
142 ; X86-NOCMOV-NEXT: addl $32, %eax
143 ; X86-NOCMOV-NEXT: xorl %edx, %edx
144 ; X86-NOCMOV-NEXT: retl
145 ; X86-NOCMOV-NEXT: .LBB3_1:
146 ; X86-NOCMOV-NEXT: rep bsfl %eax, %eax
147 ; X86-NOCMOV-NEXT: xorl %edx, %edx
148 ; X86-NOCMOV-NEXT: retl
150 ; X86-CMOV-LABEL: cttz_i64:
152 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
153 ; X86-CMOV-NEXT: rep bsfl %ecx, %edx
154 ; X86-CMOV-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax
155 ; X86-CMOV-NEXT: addl $32, %eax
156 ; X86-CMOV-NEXT: testl %ecx, %ecx
157 ; X86-CMOV-NEXT: cmovnel %edx, %eax
158 ; X86-CMOV-NEXT: xorl %edx, %edx
159 ; X86-CMOV-NEXT: retl
161 ; X64-LABEL: cttz_i64:
163 ; X64-NEXT: rep bsfq %rdi, %rax
166 ; X86-CLZ-LABEL: cttz_i64:
168 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
169 ; X86-CLZ-NEXT: testl %eax, %eax
170 ; X86-CLZ-NEXT: jne .LBB3_1
171 ; X86-CLZ-NEXT: # %bb.2:
172 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
173 ; X86-CLZ-NEXT: addl $32, %eax
174 ; X86-CLZ-NEXT: xorl %edx, %edx
176 ; X86-CLZ-NEXT: .LBB3_1:
177 ; X86-CLZ-NEXT: tzcntl %eax, %eax
178 ; X86-CLZ-NEXT: xorl %edx, %edx
181 ; X64-CLZ-LABEL: cttz_i64:
183 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
186 ; X64-FASTLZCNT-LABEL: cttz_i64:
187 ; X64-FASTLZCNT: # %bb.0:
188 ; X64-FASTLZCNT-NEXT: tzcntq %rdi, %rax
189 ; X64-FASTLZCNT-NEXT: retq
191 ; X86-FASTLZCNT-LABEL: cttz_i64:
192 ; X86-FASTLZCNT: # %bb.0:
193 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
194 ; X86-FASTLZCNT-NEXT: testl %eax, %eax
195 ; X86-FASTLZCNT-NEXT: jne .LBB3_1
196 ; X86-FASTLZCNT-NEXT: # %bb.2:
197 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
198 ; X86-FASTLZCNT-NEXT: addl $32, %eax
199 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
200 ; X86-FASTLZCNT-NEXT: retl
201 ; X86-FASTLZCNT-NEXT: .LBB3_1:
202 ; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax
203 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
204 ; X86-FASTLZCNT-NEXT: retl
205 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
209 define i8 @ctlz_i8(i8 %x) {
210 ; X86-LABEL: ctlz_i8:
212 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
213 ; X86-NEXT: bsrl %eax, %eax
214 ; X86-NEXT: xorl $7, %eax
215 ; X86-NEXT: # kill: def $al killed $al killed $eax
218 ; X64-LABEL: ctlz_i8:
220 ; X64-NEXT: movzbl %dil, %eax
221 ; X64-NEXT: bsrl %eax, %eax
222 ; X64-NEXT: xorl $7, %eax
223 ; X64-NEXT: # kill: def $al killed $al killed $eax
226 ; X86-CLZ-LABEL: ctlz_i8:
228 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
229 ; X86-CLZ-NEXT: lzcntl %eax, %eax
230 ; X86-CLZ-NEXT: addl $-24, %eax
231 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
234 ; X64-CLZ-LABEL: ctlz_i8:
236 ; X64-CLZ-NEXT: movzbl %dil, %eax
237 ; X64-CLZ-NEXT: lzcntl %eax, %eax
238 ; X64-CLZ-NEXT: addl $-24, %eax
239 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
242 ; X64-FASTLZCNT-LABEL: ctlz_i8:
243 ; X64-FASTLZCNT: # %bb.0:
244 ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax
245 ; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax
246 ; X64-FASTLZCNT-NEXT: addl $-24, %eax
247 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
248 ; X64-FASTLZCNT-NEXT: retq
250 ; X86-FASTLZCNT-LABEL: ctlz_i8:
251 ; X86-FASTLZCNT: # %bb.0:
252 ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
253 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
254 ; X86-FASTLZCNT-NEXT: addl $-24, %eax
255 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
256 ; X86-FASTLZCNT-NEXT: retl
257 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
261 define i16 @ctlz_i16(i16 %x) {
262 ; X86-LABEL: ctlz_i16:
264 ; X86-NEXT: bsrw {{[0-9]+}}(%esp), %ax
265 ; X86-NEXT: xorl $15, %eax
266 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
269 ; X64-LABEL: ctlz_i16:
271 ; X64-NEXT: bsrw %di, %ax
272 ; X64-NEXT: xorl $15, %eax
273 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
276 ; X86-CLZ-LABEL: ctlz_i16:
278 ; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
281 ; X64-CLZ-LABEL: ctlz_i16:
283 ; X64-CLZ-NEXT: lzcntw %di, %ax
286 ; X64-FASTLZCNT-LABEL: ctlz_i16:
287 ; X64-FASTLZCNT: # %bb.0:
288 ; X64-FASTLZCNT-NEXT: lzcntw %di, %ax
289 ; X64-FASTLZCNT-NEXT: retq
291 ; X86-FASTLZCNT-LABEL: ctlz_i16:
292 ; X86-FASTLZCNT: # %bb.0:
293 ; X86-FASTLZCNT-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
294 ; X86-FASTLZCNT-NEXT: retl
295 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
299 define i32 @ctlz_i32(i32 %x) {
300 ; X86-LABEL: ctlz_i32:
302 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax
303 ; X86-NEXT: xorl $31, %eax
306 ; X64-LABEL: ctlz_i32:
308 ; X64-NEXT: bsrl %edi, %eax
309 ; X64-NEXT: xorl $31, %eax
312 ; X86-CLZ-LABEL: ctlz_i32:
314 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
317 ; X64-CLZ-LABEL: ctlz_i32:
319 ; X64-CLZ-NEXT: lzcntl %edi, %eax
322 ; X64-FASTLZCNT-LABEL: ctlz_i32:
323 ; X64-FASTLZCNT: # %bb.0:
324 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
325 ; X64-FASTLZCNT-NEXT: retq
327 ; X86-FASTLZCNT-LABEL: ctlz_i32:
328 ; X86-FASTLZCNT: # %bb.0:
329 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
330 ; X86-FASTLZCNT-NEXT: retl
331 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
335 define i64 @ctlz_i64(i64 %x) {
336 ; X86-NOCMOV-LABEL: ctlz_i64:
337 ; X86-NOCMOV: # %bb.0:
338 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
339 ; X86-NOCMOV-NEXT: testl %eax, %eax
340 ; X86-NOCMOV-NEXT: jne .LBB7_1
341 ; X86-NOCMOV-NEXT: # %bb.2:
342 ; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
343 ; X86-NOCMOV-NEXT: xorl $31, %eax
344 ; X86-NOCMOV-NEXT: addl $32, %eax
345 ; X86-NOCMOV-NEXT: xorl %edx, %edx
346 ; X86-NOCMOV-NEXT: retl
347 ; X86-NOCMOV-NEXT: .LBB7_1:
348 ; X86-NOCMOV-NEXT: bsrl %eax, %eax
349 ; X86-NOCMOV-NEXT: xorl $31, %eax
350 ; X86-NOCMOV-NEXT: xorl %edx, %edx
351 ; X86-NOCMOV-NEXT: retl
353 ; X86-CMOV-LABEL: ctlz_i64:
355 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
356 ; X86-CMOV-NEXT: bsrl %ecx, %edx
357 ; X86-CMOV-NEXT: xorl $31, %edx
358 ; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
359 ; X86-CMOV-NEXT: xorl $31, %eax
360 ; X86-CMOV-NEXT: addl $32, %eax
361 ; X86-CMOV-NEXT: testl %ecx, %ecx
362 ; X86-CMOV-NEXT: cmovnel %edx, %eax
363 ; X86-CMOV-NEXT: xorl %edx, %edx
364 ; X86-CMOV-NEXT: retl
366 ; X64-LABEL: ctlz_i64:
368 ; X64-NEXT: bsrq %rdi, %rax
369 ; X64-NEXT: xorq $63, %rax
372 ; X86-CLZ-LABEL: ctlz_i64:
374 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
375 ; X86-CLZ-NEXT: testl %eax, %eax
376 ; X86-CLZ-NEXT: jne .LBB7_1
377 ; X86-CLZ-NEXT: # %bb.2:
378 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
379 ; X86-CLZ-NEXT: addl $32, %eax
380 ; X86-CLZ-NEXT: xorl %edx, %edx
382 ; X86-CLZ-NEXT: .LBB7_1:
383 ; X86-CLZ-NEXT: lzcntl %eax, %eax
384 ; X86-CLZ-NEXT: xorl %edx, %edx
387 ; X64-CLZ-LABEL: ctlz_i64:
389 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
392 ; X64-FASTLZCNT-LABEL: ctlz_i64:
393 ; X64-FASTLZCNT: # %bb.0:
394 ; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax
395 ; X64-FASTLZCNT-NEXT: retq
397 ; X86-FASTLZCNT-LABEL: ctlz_i64:
398 ; X86-FASTLZCNT: # %bb.0:
399 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
400 ; X86-FASTLZCNT-NEXT: testl %eax, %eax
401 ; X86-FASTLZCNT-NEXT: jne .LBB7_1
402 ; X86-FASTLZCNT-NEXT: # %bb.2:
403 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
404 ; X86-FASTLZCNT-NEXT: addl $32, %eax
405 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
406 ; X86-FASTLZCNT-NEXT: retl
407 ; X86-FASTLZCNT-NEXT: .LBB7_1:
408 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
409 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
410 ; X86-FASTLZCNT-NEXT: retl
411 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
415 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
416 define i8 @ctlz_i8_zero_test(i8 %n) {
417 ; X86-LABEL: ctlz_i8_zero_test:
419 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
420 ; X86-NEXT: testb %al, %al
421 ; X86-NEXT: je .LBB8_1
422 ; X86-NEXT: # %bb.2: # %cond.false
423 ; X86-NEXT: movzbl %al, %eax
424 ; X86-NEXT: bsrl %eax, %eax
425 ; X86-NEXT: xorl $7, %eax
426 ; X86-NEXT: # kill: def $al killed $al killed $eax
429 ; X86-NEXT: movb $8, %al
430 ; X86-NEXT: # kill: def $al killed $al killed $eax
433 ; X64-LABEL: ctlz_i8_zero_test:
435 ; X64-NEXT: testb %dil, %dil
436 ; X64-NEXT: je .LBB8_1
437 ; X64-NEXT: # %bb.2: # %cond.false
438 ; X64-NEXT: movzbl %dil, %eax
439 ; X64-NEXT: bsrl %eax, %eax
440 ; X64-NEXT: xorl $7, %eax
441 ; X64-NEXT: # kill: def $al killed $al killed $eax
444 ; X64-NEXT: movb $8, %al
445 ; X64-NEXT: # kill: def $al killed $al killed $eax
448 ; X86-CLZ-LABEL: ctlz_i8_zero_test:
450 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
451 ; X86-CLZ-NEXT: lzcntl %eax, %eax
452 ; X86-CLZ-NEXT: addl $-24, %eax
453 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
456 ; X64-CLZ-LABEL: ctlz_i8_zero_test:
458 ; X64-CLZ-NEXT: movzbl %dil, %eax
459 ; X64-CLZ-NEXT: lzcntl %eax, %eax
460 ; X64-CLZ-NEXT: addl $-24, %eax
461 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
464 ; X64-FASTLZCNT-LABEL: ctlz_i8_zero_test:
465 ; X64-FASTLZCNT: # %bb.0:
466 ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax
467 ; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax
468 ; X64-FASTLZCNT-NEXT: addl $-24, %eax
469 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
470 ; X64-FASTLZCNT-NEXT: retq
472 ; X86-FASTLZCNT-LABEL: ctlz_i8_zero_test:
473 ; X86-FASTLZCNT: # %bb.0:
474 ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
475 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
476 ; X86-FASTLZCNT-NEXT: addl $-24, %eax
477 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
478 ; X86-FASTLZCNT-NEXT: retl
479 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
483 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
484 define i16 @ctlz_i16_zero_test(i16 %n) {
485 ; X86-LABEL: ctlz_i16_zero_test:
487 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
488 ; X86-NEXT: testw %ax, %ax
489 ; X86-NEXT: je .LBB9_1
490 ; X86-NEXT: # %bb.2: # %cond.false
491 ; X86-NEXT: bsrw %ax, %ax
492 ; X86-NEXT: xorl $15, %eax
493 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
496 ; X86-NEXT: movw $16, %ax
497 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
500 ; X64-LABEL: ctlz_i16_zero_test:
502 ; X64-NEXT: testw %di, %di
503 ; X64-NEXT: je .LBB9_1
504 ; X64-NEXT: # %bb.2: # %cond.false
505 ; X64-NEXT: bsrw %di, %ax
506 ; X64-NEXT: xorl $15, %eax
507 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
510 ; X64-NEXT: movw $16, %ax
511 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
514 ; X86-CLZ-LABEL: ctlz_i16_zero_test:
516 ; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
519 ; X64-CLZ-LABEL: ctlz_i16_zero_test:
521 ; X64-CLZ-NEXT: lzcntw %di, %ax
524 ; X64-FASTLZCNT-LABEL: ctlz_i16_zero_test:
525 ; X64-FASTLZCNT: # %bb.0:
526 ; X64-FASTLZCNT-NEXT: lzcntw %di, %ax
527 ; X64-FASTLZCNT-NEXT: retq
529 ; X86-FASTLZCNT-LABEL: ctlz_i16_zero_test:
530 ; X86-FASTLZCNT: # %bb.0:
531 ; X86-FASTLZCNT-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
532 ; X86-FASTLZCNT-NEXT: retl
533 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
537 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
538 define i32 @ctlz_i32_zero_test(i32 %n) {
539 ; X86-LABEL: ctlz_i32_zero_test:
541 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
542 ; X86-NEXT: testl %eax, %eax
543 ; X86-NEXT: je .LBB10_1
544 ; X86-NEXT: # %bb.2: # %cond.false
545 ; X86-NEXT: bsrl %eax, %eax
546 ; X86-NEXT: xorl $31, %eax
548 ; X86-NEXT: .LBB10_1:
549 ; X86-NEXT: movl $32, %eax
552 ; X64-LABEL: ctlz_i32_zero_test:
554 ; X64-NEXT: testl %edi, %edi
555 ; X64-NEXT: je .LBB10_1
556 ; X64-NEXT: # %bb.2: # %cond.false
557 ; X64-NEXT: bsrl %edi, %eax
558 ; X64-NEXT: xorl $31, %eax
560 ; X64-NEXT: .LBB10_1:
561 ; X64-NEXT: movl $32, %eax
564 ; X86-CLZ-LABEL: ctlz_i32_zero_test:
566 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
569 ; X64-CLZ-LABEL: ctlz_i32_zero_test:
571 ; X64-CLZ-NEXT: lzcntl %edi, %eax
574 ; X64-FASTLZCNT-LABEL: ctlz_i32_zero_test:
575 ; X64-FASTLZCNT: # %bb.0:
576 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
577 ; X64-FASTLZCNT-NEXT: retq
579 ; X86-FASTLZCNT-LABEL: ctlz_i32_zero_test:
580 ; X86-FASTLZCNT: # %bb.0:
581 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
582 ; X86-FASTLZCNT-NEXT: retl
583 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
587 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
588 define i64 @ctlz_i64_zero_test(i64 %n) {
589 ; X86-NOCMOV-LABEL: ctlz_i64_zero_test:
590 ; X86-NOCMOV: # %bb.0:
591 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
592 ; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %edx
593 ; X86-NOCMOV-NEXT: movl $63, %eax
594 ; X86-NOCMOV-NEXT: je .LBB11_2
595 ; X86-NOCMOV-NEXT: # %bb.1:
596 ; X86-NOCMOV-NEXT: movl %edx, %eax
597 ; X86-NOCMOV-NEXT: .LBB11_2:
598 ; X86-NOCMOV-NEXT: testl %ecx, %ecx
599 ; X86-NOCMOV-NEXT: jne .LBB11_3
600 ; X86-NOCMOV-NEXT: # %bb.4:
601 ; X86-NOCMOV-NEXT: xorl $31, %eax
602 ; X86-NOCMOV-NEXT: addl $32, %eax
603 ; X86-NOCMOV-NEXT: xorl %edx, %edx
604 ; X86-NOCMOV-NEXT: retl
605 ; X86-NOCMOV-NEXT: .LBB11_3:
606 ; X86-NOCMOV-NEXT: bsrl %ecx, %eax
607 ; X86-NOCMOV-NEXT: xorl $31, %eax
608 ; X86-NOCMOV-NEXT: xorl %edx, %edx
609 ; X86-NOCMOV-NEXT: retl
611 ; X86-CMOV-LABEL: ctlz_i64_zero_test:
613 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
614 ; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
615 ; X86-CMOV-NEXT: movl $63, %edx
616 ; X86-CMOV-NEXT: cmovnel %eax, %edx
617 ; X86-CMOV-NEXT: xorl $31, %edx
618 ; X86-CMOV-NEXT: addl $32, %edx
619 ; X86-CMOV-NEXT: bsrl %ecx, %eax
620 ; X86-CMOV-NEXT: xorl $31, %eax
621 ; X86-CMOV-NEXT: testl %ecx, %ecx
622 ; X86-CMOV-NEXT: cmovel %edx, %eax
623 ; X86-CMOV-NEXT: xorl %edx, %edx
624 ; X86-CMOV-NEXT: retl
626 ; X64-LABEL: ctlz_i64_zero_test:
628 ; X64-NEXT: testq %rdi, %rdi
629 ; X64-NEXT: je .LBB11_1
630 ; X64-NEXT: # %bb.2: # %cond.false
631 ; X64-NEXT: bsrq %rdi, %rax
632 ; X64-NEXT: xorq $63, %rax
634 ; X64-NEXT: .LBB11_1:
635 ; X64-NEXT: movl $64, %eax
638 ; X86-CLZ-LABEL: ctlz_i64_zero_test:
640 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
641 ; X86-CLZ-NEXT: testl %eax, %eax
642 ; X86-CLZ-NEXT: jne .LBB11_1
643 ; X86-CLZ-NEXT: # %bb.2:
644 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
645 ; X86-CLZ-NEXT: addl $32, %eax
646 ; X86-CLZ-NEXT: xorl %edx, %edx
648 ; X86-CLZ-NEXT: .LBB11_1:
649 ; X86-CLZ-NEXT: lzcntl %eax, %eax
650 ; X86-CLZ-NEXT: xorl %edx, %edx
653 ; X64-CLZ-LABEL: ctlz_i64_zero_test:
655 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
658 ; X64-FASTLZCNT-LABEL: ctlz_i64_zero_test:
659 ; X64-FASTLZCNT: # %bb.0:
660 ; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax
661 ; X64-FASTLZCNT-NEXT: retq
663 ; X86-FASTLZCNT-LABEL: ctlz_i64_zero_test:
664 ; X86-FASTLZCNT: # %bb.0:
665 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
666 ; X86-FASTLZCNT-NEXT: testl %eax, %eax
667 ; X86-FASTLZCNT-NEXT: jne .LBB11_1
668 ; X86-FASTLZCNT-NEXT: # %bb.2:
669 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
670 ; X86-FASTLZCNT-NEXT: addl $32, %eax
671 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
672 ; X86-FASTLZCNT-NEXT: retl
673 ; X86-FASTLZCNT-NEXT: .LBB11_1:
674 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
675 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
676 ; X86-FASTLZCNT-NEXT: retl
677 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
681 ; Promote i8 cttz to i32 and mask bit8 to prevent (slow) zero-src bsf case.
682 define i8 @cttz_i8_zero_test(i8 %n) {
683 ; X86-LABEL: cttz_i8_zero_test:
685 ; X86-NEXT: movl $256, %eax # imm = 0x100
686 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
687 ; X86-NEXT: rep bsfl %eax, %eax
688 ; X86-NEXT: # kill: def $al killed $al killed $eax
691 ; X64-LABEL: cttz_i8_zero_test:
693 ; X64-NEXT: orl $256, %edi # imm = 0x100
694 ; X64-NEXT: rep bsfl %edi, %eax
695 ; X64-NEXT: # kill: def $al killed $al killed $eax
698 ; X86-CLZ-LABEL: cttz_i8_zero_test:
700 ; X86-CLZ-NEXT: movl $256, %eax # imm = 0x100
701 ; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
702 ; X86-CLZ-NEXT: tzcntl %eax, %eax
703 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
706 ; X64-CLZ-LABEL: cttz_i8_zero_test:
708 ; X64-CLZ-NEXT: orl $256, %edi # imm = 0x100
709 ; X64-CLZ-NEXT: tzcntl %edi, %eax
710 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
713 ; X64-FASTLZCNT-LABEL: cttz_i8_zero_test:
714 ; X64-FASTLZCNT: # %bb.0:
715 ; X64-FASTLZCNT-NEXT: orl $256, %edi # imm = 0x100
716 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
717 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
718 ; X64-FASTLZCNT-NEXT: retq
720 ; X86-FASTLZCNT-LABEL: cttz_i8_zero_test:
721 ; X86-FASTLZCNT: # %bb.0:
722 ; X86-FASTLZCNT-NEXT: movl $256, %eax # imm = 0x100
723 ; X86-FASTLZCNT-NEXT: orl {{[0-9]+}}(%esp), %eax
724 ; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax
725 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
726 ; X86-FASTLZCNT-NEXT: retl
727 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
731 ; Promote i16 cttz to i32 and mask bit16 to prevent (slow) zero-src bsf case.
732 define i16 @cttz_i16_zero_test(i16 %n) {
733 ; X86-LABEL: cttz_i16_zero_test:
735 ; X86-NEXT: movl $65536, %eax # imm = 0x10000
736 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
737 ; X86-NEXT: rep bsfl %eax, %eax
738 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
741 ; X64-LABEL: cttz_i16_zero_test:
743 ; X64-NEXT: orl $65536, %edi # imm = 0x10000
744 ; X64-NEXT: rep bsfl %edi, %eax
745 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
748 ; X86-CLZ-LABEL: cttz_i16_zero_test:
750 ; X86-CLZ-NEXT: movl $65536, %eax # imm = 0x10000
751 ; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
752 ; X86-CLZ-NEXT: tzcntl %eax, %eax
753 ; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
756 ; X64-CLZ-LABEL: cttz_i16_zero_test:
758 ; X64-CLZ-NEXT: orl $65536, %edi # imm = 0x10000
759 ; X64-CLZ-NEXT: tzcntl %edi, %eax
760 ; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
763 ; X64-FASTLZCNT-LABEL: cttz_i16_zero_test:
764 ; X64-FASTLZCNT: # %bb.0:
765 ; X64-FASTLZCNT-NEXT: orl $65536, %edi # imm = 0x10000
766 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
767 ; X64-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax
768 ; X64-FASTLZCNT-NEXT: retq
770 ; X86-FASTLZCNT-LABEL: cttz_i16_zero_test:
771 ; X86-FASTLZCNT: # %bb.0:
772 ; X86-FASTLZCNT-NEXT: movl $65536, %eax # imm = 0x10000
773 ; X86-FASTLZCNT-NEXT: orl {{[0-9]+}}(%esp), %eax
774 ; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax
775 ; X86-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax
776 ; X86-FASTLZCNT-NEXT: retl
777 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
781 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
782 define i32 @cttz_i32_zero_test(i32 %n) {
783 ; X86-LABEL: cttz_i32_zero_test:
785 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
786 ; X86-NEXT: testl %eax, %eax
787 ; X86-NEXT: je .LBB14_1
788 ; X86-NEXT: # %bb.2: # %cond.false
789 ; X86-NEXT: rep bsfl %eax, %eax
791 ; X86-NEXT: .LBB14_1:
792 ; X86-NEXT: movl $32, %eax
795 ; X64-LABEL: cttz_i32_zero_test:
797 ; X64-NEXT: testl %edi, %edi
798 ; X64-NEXT: je .LBB14_1
799 ; X64-NEXT: # %bb.2: # %cond.false
800 ; X64-NEXT: rep bsfl %edi, %eax
802 ; X64-NEXT: .LBB14_1:
803 ; X64-NEXT: movl $32, %eax
806 ; X86-CLZ-LABEL: cttz_i32_zero_test:
808 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
811 ; X64-CLZ-LABEL: cttz_i32_zero_test:
813 ; X64-CLZ-NEXT: tzcntl %edi, %eax
816 ; X64-FASTLZCNT-LABEL: cttz_i32_zero_test:
817 ; X64-FASTLZCNT: # %bb.0:
818 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
819 ; X64-FASTLZCNT-NEXT: retq
821 ; X86-FASTLZCNT-LABEL: cttz_i32_zero_test:
822 ; X86-FASTLZCNT: # %bb.0:
823 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
824 ; X86-FASTLZCNT-NEXT: retl
825 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
829 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
830 define i64 @cttz_i64_zero_test(i64 %n) {
831 ; X86-NOCMOV-LABEL: cttz_i64_zero_test:
832 ; X86-NOCMOV: # %bb.0:
833 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
834 ; X86-NOCMOV-NOT: rep
835 ; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %edx
836 ; X86-NOCMOV-NEXT: movl $32, %eax
837 ; X86-NOCMOV-NEXT: je .LBB15_2
838 ; X86-NOCMOV-NEXT: # %bb.1:
839 ; X86-NOCMOV-NEXT: movl %edx, %eax
840 ; X86-NOCMOV-NEXT: .LBB15_2:
841 ; X86-NOCMOV-NEXT: testl %ecx, %ecx
842 ; X86-NOCMOV-NEXT: jne .LBB15_3
843 ; X86-NOCMOV-NEXT: # %bb.4:
844 ; X86-NOCMOV-NEXT: addl $32, %eax
845 ; X86-NOCMOV-NEXT: xorl %edx, %edx
846 ; X86-NOCMOV-NEXT: retl
847 ; X86-NOCMOV-NEXT: .LBB15_3:
848 ; X86-NOCMOV-NEXT: rep bsfl %ecx, %eax
849 ; X86-NOCMOV-NEXT: xorl %edx, %edx
850 ; X86-NOCMOV-NEXT: retl
852 ; X86-CMOV-LABEL: cttz_i64_zero_test:
854 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
856 ; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx
857 ; X86-CMOV-NEXT: movl $32, %edx
858 ; X86-CMOV-NEXT: cmovnel %ecx, %edx
859 ; X86-CMOV-NEXT: addl $32, %edx
861 ; X86-CMOV-NEXT: bsfl %eax, %eax
862 ; X86-CMOV-NEXT: cmovel %edx, %eax
863 ; X86-CMOV-NEXT: xorl %edx, %edx
864 ; X86-CMOV-NEXT: retl
866 ; X64-LABEL: cttz_i64_zero_test:
868 ; X64-NEXT: testq %rdi, %rdi
869 ; X64-NEXT: je .LBB15_1
870 ; X64-NEXT: # %bb.2: # %cond.false
871 ; X64-NEXT: rep bsfq %rdi, %rax
873 ; X64-NEXT: .LBB15_1:
874 ; X64-NEXT: movl $64, %eax
877 ; X86-CLZ-LABEL: cttz_i64_zero_test:
879 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
880 ; X86-CLZ-NEXT: testl %eax, %eax
881 ; X86-CLZ-NEXT: jne .LBB15_1
882 ; X86-CLZ-NEXT: # %bb.2:
883 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
884 ; X86-CLZ-NEXT: addl $32, %eax
885 ; X86-CLZ-NEXT: xorl %edx, %edx
887 ; X86-CLZ-NEXT: .LBB15_1:
888 ; X86-CLZ-NEXT: tzcntl %eax, %eax
889 ; X86-CLZ-NEXT: xorl %edx, %edx
892 ; X64-CLZ-LABEL: cttz_i64_zero_test:
894 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
897 ; X64-FASTLZCNT-LABEL: cttz_i64_zero_test:
898 ; X64-FASTLZCNT: # %bb.0:
899 ; X64-FASTLZCNT-NEXT: tzcntq %rdi, %rax
900 ; X64-FASTLZCNT-NEXT: retq
902 ; X86-FASTLZCNT-LABEL: cttz_i64_zero_test:
903 ; X86-FASTLZCNT: # %bb.0:
904 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
905 ; X86-FASTLZCNT-NEXT: testl %eax, %eax
906 ; X86-FASTLZCNT-NEXT: jne .LBB15_1
907 ; X86-FASTLZCNT-NEXT: # %bb.2:
908 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
909 ; X86-FASTLZCNT-NEXT: addl $32, %eax
910 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
911 ; X86-FASTLZCNT-NEXT: retl
912 ; X86-FASTLZCNT-NEXT: .LBB15_1:
913 ; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax
914 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
915 ; X86-FASTLZCNT-NEXT: retl
916 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
920 ; Don't generate the cmovne when the source is known non-zero (and bsr would
923 define i32 @ctlz_i32_fold_cmov(i32 %n) {
924 ; X86-LABEL: ctlz_i32_fold_cmov:
926 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
927 ; X86-NEXT: orl $1, %eax
928 ; X86-NEXT: bsrl %eax, %eax
929 ; X86-NEXT: xorl $31, %eax
932 ; X64-LABEL: ctlz_i32_fold_cmov:
934 ; X64-NEXT: orl $1, %edi
935 ; X64-NEXT: bsrl %edi, %eax
936 ; X64-NEXT: xorl $31, %eax
939 ; X86-CLZ-LABEL: ctlz_i32_fold_cmov:
941 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
942 ; X86-CLZ-NEXT: orl $1, %eax
943 ; X86-CLZ-NEXT: lzcntl %eax, %eax
946 ; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
948 ; X64-CLZ-NEXT: orl $1, %edi
949 ; X64-CLZ-NEXT: lzcntl %edi, %eax
952 ; X64-FASTLZCNT-LABEL: ctlz_i32_fold_cmov:
953 ; X64-FASTLZCNT: # %bb.0:
954 ; X64-FASTLZCNT-NEXT: orl $1, %edi
955 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
956 ; X64-FASTLZCNT-NEXT: retq
958 ; X86-FASTLZCNT-LABEL: ctlz_i32_fold_cmov:
959 ; X86-FASTLZCNT: # %bb.0:
960 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
961 ; X86-FASTLZCNT-NEXT: orl $1, %eax
962 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
963 ; X86-FASTLZCNT-NEXT: retl
965 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
969 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
970 ; the most significant bit, which is what 'bsr' does natively.
971 ; NOTE: We intentionally don't select `bsr` when `fast-lzcnt` is
972 ; available. This is 1) because `bsr` has some drawbacks including a
973 ; dependency on dst, 2) very poor performance on some of the
974 ; `fast-lzcnt` processors, and 3) `lzcnt` runs at ALU latency/throughput
975 ; so `lzcnt` + `xor` has better throughput than even the 1-uop
976 ; (1c latency, 1c throughput) `bsr`.
977 define i32 @ctlz_bsr(i32 %n) {
978 ; X86-LABEL: ctlz_bsr:
980 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax
983 ; X64-LABEL: ctlz_bsr:
985 ; X64-NEXT: bsrl %edi, %eax
988 ; X86-CLZ-LABEL: ctlz_bsr:
990 ; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %eax
993 ; X64-CLZ-LABEL: ctlz_bsr:
995 ; X64-CLZ-NEXT: bsrl %edi, %eax
998 ; X64-FASTLZCNT-LABEL: ctlz_bsr:
999 ; X64-FASTLZCNT: # %bb.0:
1000 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
1001 ; X64-FASTLZCNT-NEXT: xorl $31, %eax
1002 ; X64-FASTLZCNT-NEXT: retq
1004 ; X86-FASTLZCNT-LABEL: ctlz_bsr:
1005 ; X86-FASTLZCNT: # %bb.0:
1006 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1007 ; X86-FASTLZCNT-NEXT: xorl $31, %eax
1008 ; X86-FASTLZCNT-NEXT: retl
1009 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
1010 %bsr = xor i32 %ctlz, 31
1014 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
1015 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
1016 ; codegen doesn't know how to combine the $32 and $31 into $63.
1017 define i32 @ctlz_bsr_zero_test(i32 %n) {
1018 ; X86-LABEL: ctlz_bsr_zero_test:
1020 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1021 ; X86-NEXT: testl %eax, %eax
1022 ; X86-NEXT: je .LBB18_1
1023 ; X86-NEXT: # %bb.2: # %cond.false
1024 ; X86-NEXT: bsrl %eax, %eax
1025 ; X86-NEXT: xorl $31, %eax
1026 ; X86-NEXT: xorl $31, %eax
1028 ; X86-NEXT: .LBB18_1:
1029 ; X86-NEXT: movl $32, %eax
1030 ; X86-NEXT: xorl $31, %eax
1033 ; X64-LABEL: ctlz_bsr_zero_test:
1035 ; X64-NEXT: testl %edi, %edi
1036 ; X64-NEXT: je .LBB18_1
1037 ; X64-NEXT: # %bb.2: # %cond.false
1038 ; X64-NEXT: bsrl %edi, %eax
1039 ; X64-NEXT: xorl $31, %eax
1040 ; X64-NEXT: xorl $31, %eax
1042 ; X64-NEXT: .LBB18_1:
1043 ; X64-NEXT: movl $32, %eax
1044 ; X64-NEXT: xorl $31, %eax
1047 ; X86-CLZ-LABEL: ctlz_bsr_zero_test:
1049 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1050 ; X86-CLZ-NEXT: xorl $31, %eax
1051 ; X86-CLZ-NEXT: retl
1053 ; X64-CLZ-LABEL: ctlz_bsr_zero_test:
1055 ; X64-CLZ-NEXT: lzcntl %edi, %eax
1056 ; X64-CLZ-NEXT: xorl $31, %eax
1057 ; X64-CLZ-NEXT: retq
1059 ; X64-FASTLZCNT-LABEL: ctlz_bsr_zero_test:
1060 ; X64-FASTLZCNT: # %bb.0:
1061 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
1062 ; X64-FASTLZCNT-NEXT: xorl $31, %eax
1063 ; X64-FASTLZCNT-NEXT: retq
1065 ; X86-FASTLZCNT-LABEL: ctlz_bsr_zero_test:
1066 ; X86-FASTLZCNT: # %bb.0:
1067 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1068 ; X86-FASTLZCNT-NEXT: xorl $31, %eax
1069 ; X86-FASTLZCNT-NEXT: retl
1070 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
1071 %bsr = xor i32 %ctlz, 31
1075 define i8 @cttz_i8_knownbits(i8 %x) {
1076 ; X86-LABEL: cttz_i8_knownbits:
1078 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1079 ; X86-NEXT: orb $2, %al
1080 ; X86-NEXT: movzbl %al, %eax
1081 ; X86-NEXT: rep bsfl %eax, %eax
1082 ; X86-NEXT: # kill: def $al killed $al killed $eax
1085 ; X64-LABEL: cttz_i8_knownbits:
1087 ; X64-NEXT: orb $2, %dil
1088 ; X64-NEXT: movzbl %dil, %eax
1089 ; X64-NEXT: rep bsfl %eax, %eax
1090 ; X64-NEXT: # kill: def $al killed $al killed $eax
1093 ; X86-CLZ-LABEL: cttz_i8_knownbits:
1095 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1096 ; X86-CLZ-NEXT: orb $2, %al
1097 ; X86-CLZ-NEXT: movzbl %al, %eax
1098 ; X86-CLZ-NEXT: tzcntl %eax, %eax
1099 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
1100 ; X86-CLZ-NEXT: retl
1102 ; X64-CLZ-LABEL: cttz_i8_knownbits:
1104 ; X64-CLZ-NEXT: orb $2, %dil
1105 ; X64-CLZ-NEXT: movzbl %dil, %eax
1106 ; X64-CLZ-NEXT: tzcntl %eax, %eax
1107 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
1108 ; X64-CLZ-NEXT: retq
1110 ; X64-FASTLZCNT-LABEL: cttz_i8_knownbits:
1111 ; X64-FASTLZCNT: # %bb.0:
1112 ; X64-FASTLZCNT-NEXT: orb $2, %dil
1113 ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax
1114 ; X64-FASTLZCNT-NEXT: tzcntl %eax, %eax
1115 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1116 ; X64-FASTLZCNT-NEXT: retq
1118 ; X86-FASTLZCNT-LABEL: cttz_i8_knownbits:
1119 ; X86-FASTLZCNT: # %bb.0:
1120 ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1121 ; X86-FASTLZCNT-NEXT: orb $2, %al
1122 ; X86-FASTLZCNT-NEXT: movzbl %al, %eax
1123 ; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax
1124 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1125 ; X86-FASTLZCNT-NEXT: retl
1127 %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
1128 %tmp2 = and i8 %tmp, 1
1132 define i8 @ctlz_i8_knownbits(i8 %x) {
1133 ; X86-LABEL: ctlz_i8_knownbits:
1135 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1136 ; X86-NEXT: orb $64, %al
1137 ; X86-NEXT: movzbl %al, %eax
1138 ; X86-NEXT: bsrl %eax, %eax
1139 ; X86-NEXT: xorl $7, %eax
1140 ; X86-NEXT: # kill: def $al killed $al killed $eax
1143 ; X64-LABEL: ctlz_i8_knownbits:
1145 ; X64-NEXT: orb $64, %dil
1146 ; X64-NEXT: movzbl %dil, %eax
1147 ; X64-NEXT: bsrl %eax, %eax
1148 ; X64-NEXT: xorl $7, %eax
1149 ; X64-NEXT: # kill: def $al killed $al killed $eax
1152 ; X86-CLZ-LABEL: ctlz_i8_knownbits:
1154 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1155 ; X86-CLZ-NEXT: orb $64, %al
1156 ; X86-CLZ-NEXT: movzbl %al, %eax
1157 ; X86-CLZ-NEXT: lzcntl %eax, %eax
1158 ; X86-CLZ-NEXT: addl $-24, %eax
1159 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
1160 ; X86-CLZ-NEXT: retl
1162 ; X64-CLZ-LABEL: ctlz_i8_knownbits:
1164 ; X64-CLZ-NEXT: orb $64, %dil
1165 ; X64-CLZ-NEXT: movzbl %dil, %eax
1166 ; X64-CLZ-NEXT: lzcntl %eax, %eax
1167 ; X64-CLZ-NEXT: addl $-24, %eax
1168 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
1169 ; X64-CLZ-NEXT: retq
1171 ; X64-FASTLZCNT-LABEL: ctlz_i8_knownbits:
1172 ; X64-FASTLZCNT: # %bb.0:
1173 ; X64-FASTLZCNT-NEXT: orb $64, %dil
1174 ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax
1175 ; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax
1176 ; X64-FASTLZCNT-NEXT: addl $-24, %eax
1177 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1178 ; X64-FASTLZCNT-NEXT: retq
1180 ; X86-FASTLZCNT-LABEL: ctlz_i8_knownbits:
1181 ; X86-FASTLZCNT: # %bb.0:
1182 ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1183 ; X86-FASTLZCNT-NEXT: orb $64, %al
1184 ; X86-FASTLZCNT-NEXT: movzbl %al, %eax
1185 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
1186 ; X86-FASTLZCNT-NEXT: addl $-24, %eax
1187 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1188 ; X86-FASTLZCNT-NEXT: retl
1191 %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
1192 %tmp2 = and i8 %tmp, 1
1196 ; Make sure we can detect that the input is non-zero and avoid cmov after BSR
1197 ; This is relevant for 32-bit mode without lzcnt
1198 define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
1199 ; X86-NOCMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
1200 ; X86-NOCMOV: # %bb.0:
1201 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
1202 ; X86-NOCMOV-NEXT: testl %eax, %eax
1203 ; X86-NOCMOV-NEXT: jne .LBB21_1
1204 ; X86-NOCMOV-NEXT: # %bb.2:
1205 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
1206 ; X86-NOCMOV-NEXT: orl $1, %eax
1207 ; X86-NOCMOV-NEXT: bsrl %eax, %eax
1208 ; X86-NOCMOV-NEXT: xorl $31, %eax
1209 ; X86-NOCMOV-NEXT: orl $32, %eax
1210 ; X86-NOCMOV-NEXT: xorl %edx, %edx
1211 ; X86-NOCMOV-NEXT: retl
1212 ; X86-NOCMOV-NEXT: .LBB21_1:
1213 ; X86-NOCMOV-NEXT: bsrl %eax, %eax
1214 ; X86-NOCMOV-NEXT: xorl $31, %eax
1215 ; X86-NOCMOV-NEXT: xorl %edx, %edx
1216 ; X86-NOCMOV-NEXT: retl
1218 ; X86-CMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
1219 ; X86-CMOV: # %bb.0:
1220 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
1221 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
1222 ; X86-CMOV-NEXT: orl $1, %eax
1223 ; X86-CMOV-NEXT: bsrl %ecx, %edx
1224 ; X86-CMOV-NEXT: xorl $31, %edx
1225 ; X86-CMOV-NEXT: bsrl %eax, %eax
1226 ; X86-CMOV-NEXT: xorl $31, %eax
1227 ; X86-CMOV-NEXT: orl $32, %eax
1228 ; X86-CMOV-NEXT: testl %ecx, %ecx
1229 ; X86-CMOV-NEXT: cmovnel %edx, %eax
1230 ; X86-CMOV-NEXT: xorl %edx, %edx
1231 ; X86-CMOV-NEXT: retl
1233 ; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
1235 ; X64-NEXT: orq $1, %rdi
1236 ; X64-NEXT: bsrq %rdi, %rax
1237 ; X64-NEXT: xorq $63, %rax
1240 ; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
1242 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
1243 ; X86-CLZ-NEXT: testl %eax, %eax
1244 ; X86-CLZ-NEXT: jne .LBB21_1
1245 ; X86-CLZ-NEXT: # %bb.2:
1246 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
1247 ; X86-CLZ-NEXT: orl $1, %eax
1248 ; X86-CLZ-NEXT: lzcntl %eax, %eax
1249 ; X86-CLZ-NEXT: orl $32, %eax
1250 ; X86-CLZ-NEXT: xorl %edx, %edx
1251 ; X86-CLZ-NEXT: retl
1252 ; X86-CLZ-NEXT: .LBB21_1:
1253 ; X86-CLZ-NEXT: lzcntl %eax, %eax
1254 ; X86-CLZ-NEXT: xorl %edx, %edx
1255 ; X86-CLZ-NEXT: retl
1257 ; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
1259 ; X64-CLZ-NEXT: orq $1, %rdi
1260 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
1261 ; X64-CLZ-NEXT: retq
1263 ; X64-FASTLZCNT-LABEL: ctlz_i64_zero_test_knownneverzero:
1264 ; X64-FASTLZCNT: # %bb.0:
1265 ; X64-FASTLZCNT-NEXT: orq $1, %rdi
1266 ; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax
1267 ; X64-FASTLZCNT-NEXT: retq
1269 ; X86-FASTLZCNT-LABEL: ctlz_i64_zero_test_knownneverzero:
1270 ; X86-FASTLZCNT: # %bb.0:
1271 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
1272 ; X86-FASTLZCNT-NEXT: testl %eax, %eax
1273 ; X86-FASTLZCNT-NEXT: jne .LBB21_1
1274 ; X86-FASTLZCNT-NEXT: # %bb.2:
1275 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
1276 ; X86-FASTLZCNT-NEXT: orl $1, %eax
1277 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
1278 ; X86-FASTLZCNT-NEXT: orl $32, %eax
1279 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
1280 ; X86-FASTLZCNT-NEXT: retl
1281 ; X86-FASTLZCNT-NEXT: .LBB21_1:
1282 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
1283 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
1284 ; X86-FASTLZCNT-NEXT: retl
1286 %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false)
1290 ; Make sure we can detect that the input is non-zero and avoid cmov after BSF
1291 ; This is relevant for 32-bit mode without tzcnt
1292 define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
1293 ; X86-NOCMOV-LABEL: cttz_i64_zero_test_knownneverzero:
1294 ; X86-NOCMOV: # %bb.0:
1295 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
1296 ; X86-NOCMOV-NEXT: testl %eax, %eax
1297 ; X86-NOCMOV-NEXT: jne .LBB22_1
1298 ; X86-NOCMOV-NEXT: # %bb.2:
1299 ; X86-NOCMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000
1300 ; X86-NOCMOV-NEXT: orl {{[0-9]+}}(%esp), %eax
1301 ; X86-NOCMOV-NEXT: rep bsfl %eax, %eax
1302 ; X86-NOCMOV-NEXT: orl $32, %eax
1303 ; X86-NOCMOV-NEXT: xorl %edx, %edx
1304 ; X86-NOCMOV-NEXT: retl
1305 ; X86-NOCMOV-NEXT: .LBB22_1:
1306 ; X86-NOCMOV-NEXT: rep bsfl %eax, %eax
1307 ; X86-NOCMOV-NEXT: xorl %edx, %edx
1308 ; X86-NOCMOV-NEXT: retl
1310 ; X86-CMOV-LABEL: cttz_i64_zero_test_knownneverzero:
1311 ; X86-CMOV: # %bb.0:
1312 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
1313 ; X86-CMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000
1314 ; X86-CMOV-NEXT: orl {{[0-9]+}}(%esp), %eax
1315 ; X86-CMOV-NEXT: rep bsfl %ecx, %edx
1316 ; X86-CMOV-NEXT: rep bsfl %eax, %eax
1317 ; X86-CMOV-NEXT: orl $32, %eax
1318 ; X86-CMOV-NEXT: testl %ecx, %ecx
1319 ; X86-CMOV-NEXT: cmovnel %edx, %eax
1320 ; X86-CMOV-NEXT: xorl %edx, %edx
1321 ; X86-CMOV-NEXT: retl
1323 ; X64-LABEL: cttz_i64_zero_test_knownneverzero:
1325 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1326 ; X64-NEXT: orq %rdi, %rax
1327 ; X64-NEXT: rep bsfq %rax, %rax
1330 ; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
1332 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
1333 ; X86-CLZ-NEXT: testl %eax, %eax
1334 ; X86-CLZ-NEXT: jne .LBB22_1
1335 ; X86-CLZ-NEXT: # %bb.2:
1336 ; X86-CLZ-NEXT: movl $-2147483648, %eax # imm = 0x80000000
1337 ; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
1338 ; X86-CLZ-NEXT: tzcntl %eax, %eax
1339 ; X86-CLZ-NEXT: orl $32, %eax
1340 ; X86-CLZ-NEXT: xorl %edx, %edx
1341 ; X86-CLZ-NEXT: retl
1342 ; X86-CLZ-NEXT: .LBB22_1:
1343 ; X86-CLZ-NEXT: tzcntl %eax, %eax
1344 ; X86-CLZ-NEXT: xorl %edx, %edx
1345 ; X86-CLZ-NEXT: retl
1347 ; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
1349 ; X64-CLZ-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1350 ; X64-CLZ-NEXT: orq %rdi, %rax
1351 ; X64-CLZ-NEXT: tzcntq %rax, %rax
1352 ; X64-CLZ-NEXT: retq
1354 ; X64-FASTLZCNT-LABEL: cttz_i64_zero_test_knownneverzero:
1355 ; X64-FASTLZCNT: # %bb.0:
1356 ; X64-FASTLZCNT-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1357 ; X64-FASTLZCNT-NEXT: orq %rdi, %rax
1358 ; X64-FASTLZCNT-NEXT: tzcntq %rax, %rax
1359 ; X64-FASTLZCNT-NEXT: retq
1361 ; X86-FASTLZCNT-LABEL: cttz_i64_zero_test_knownneverzero:
1362 ; X86-FASTLZCNT: # %bb.0:
1363 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
1364 ; X86-FASTLZCNT-NEXT: testl %eax, %eax
1365 ; X86-FASTLZCNT-NEXT: jne .LBB22_1
1366 ; X86-FASTLZCNT-NEXT: # %bb.2:
1367 ; X86-FASTLZCNT-NEXT: movl $-2147483648, %eax # imm = 0x80000000
1368 ; X86-FASTLZCNT-NEXT: orl {{[0-9]+}}(%esp), %eax
1369 ; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax
1370 ; X86-FASTLZCNT-NEXT: orl $32, %eax
1371 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
1372 ; X86-FASTLZCNT-NEXT: retl
1373 ; X86-FASTLZCNT-NEXT: .LBB22_1:
1374 ; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax
1375 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
1376 ; X86-FASTLZCNT-NEXT: retl
1377 %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000
1378 %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false)
1382 ; Ensure we fold away the XOR(TRUNC(XOR(BSR(X),31)),31).
1383 define i8 @PR47603_trunc(i32 %0) {
1384 ; X86-LABEL: PR47603_trunc:
1386 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax
1387 ; X86-NEXT: # kill: def $al killed $al killed $eax
1390 ; X64-LABEL: PR47603_trunc:
1392 ; X64-NEXT: bsrl %edi, %eax
1393 ; X64-NEXT: # kill: def $al killed $al killed $eax
1396 ; X86-CLZ-LABEL: PR47603_trunc:
1398 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1399 ; X86-CLZ-NEXT: xorb $31, %al
1400 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
1401 ; X86-CLZ-NEXT: retl
1403 ; X64-CLZ-LABEL: PR47603_trunc:
1405 ; X64-CLZ-NEXT: lzcntl %edi, %eax
1406 ; X64-CLZ-NEXT: xorb $31, %al
1407 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
1408 ; X64-CLZ-NEXT: retq
1410 ; X64-FASTLZCNT-LABEL: PR47603_trunc:
1411 ; X64-FASTLZCNT: # %bb.0:
1412 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
1413 ; X64-FASTLZCNT-NEXT: xorb $31, %al
1414 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1415 ; X64-FASTLZCNT-NEXT: retq
1417 ; X86-FASTLZCNT-LABEL: PR47603_trunc:
1418 ; X86-FASTLZCNT: # %bb.0:
1419 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1420 ; X86-FASTLZCNT-NEXT: xorb $31, %al
1421 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1422 ; X86-FASTLZCNT-NEXT: retl
1423 %2 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
1425 %4 = trunc i32 %3 to i8
1429 ; Ensure we fold away the XOR(ZEXT(XOR(BSR(X),31)),31).
1430 define i32 @PR47603_zext(i32 %a0, ptr %a1) {
1431 ; X86-LABEL: PR47603_zext:
1433 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1434 ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %ecx
1435 ; X86-NEXT: movsbl (%eax,%ecx), %eax
1438 ; X64-LABEL: PR47603_zext:
1440 ; X64-NEXT: bsrl %edi, %eax
1441 ; X64-NEXT: movsbl (%rsi,%rax), %eax
1444 ; X86-CLZ-LABEL: PR47603_zext:
1446 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
1447 ; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %ecx
1448 ; X86-CLZ-NEXT: movsbl (%eax,%ecx), %eax
1449 ; X86-CLZ-NEXT: retl
1451 ; X64-CLZ-LABEL: PR47603_zext:
1453 ; X64-CLZ-NEXT: lzcntl %edi, %eax
1454 ; X64-CLZ-NEXT: xorq $31, %rax
1455 ; X64-CLZ-NEXT: movsbl (%rsi,%rax), %eax
1456 ; X64-CLZ-NEXT: retq
1458 ; X64-FASTLZCNT-LABEL: PR47603_zext:
1459 ; X64-FASTLZCNT: # %bb.0:
1460 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
1461 ; X64-FASTLZCNT-NEXT: xorq $31, %rax
1462 ; X64-FASTLZCNT-NEXT: movsbl (%rsi,%rax), %eax
1463 ; X64-FASTLZCNT-NEXT: retq
1465 ; X86-FASTLZCNT-LABEL: PR47603_zext:
1466 ; X86-FASTLZCNT: # %bb.0:
1467 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
1468 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx
1469 ; X86-FASTLZCNT-NEXT: xorl $31, %ecx
1470 ; X86-FASTLZCNT-NEXT: movsbl (%eax,%ecx), %eax
1471 ; X86-FASTLZCNT-NEXT: retl
1472 %ctlz = tail call i32 @llvm.ctlz.i32(i32 %a0, i1 true)
1473 %xor = xor i32 %ctlz, 31
1474 %zext = zext i32 %xor to i64
1475 %gep = getelementptr inbounds [32 x i8], ptr %a1, i64 0, i64 %zext
1476 %load = load i8, ptr %gep, align 1
1477 %sext = sext i8 %load to i32
1481 define i32 @cttz_i32_osize(i32 %x) optsize {
1482 ; X86-LABEL: cttz_i32_osize:
1485 ; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax
1488 ; X64-LABEL: cttz_i32_osize:
1491 ; X64-NEXT: bsfl %edi, %eax
1494 ; X86-CLZ-LABEL: cttz_i32_osize:
1496 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
1497 ; X86-CLZ-NEXT: retl
1499 ; X64-CLZ-LABEL: cttz_i32_osize:
1501 ; X64-CLZ-NEXT: tzcntl %edi, %eax
1502 ; X64-CLZ-NEXT: retq
1504 ; X64-FASTLZCNT-LABEL: cttz_i32_osize:
1505 ; X64-FASTLZCNT: # %bb.0:
1506 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
1507 ; X64-FASTLZCNT-NEXT: retq
1509 ; X86-FASTLZCNT-LABEL: cttz_i32_osize:
1510 ; X86-FASTLZCNT: # %bb.0:
1511 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
1512 ; X86-FASTLZCNT-NEXT: retl
1513 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true)
1517 define i32 @cttz_i32_msize(i32 %x) minsize {
1518 ; X86-LABEL: cttz_i32_msize:
1521 ; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax
1524 ; X64-LABEL: cttz_i32_msize:
1527 ; X64-NEXT: bsfl %edi, %eax
1530 ; X86-CLZ-LABEL: cttz_i32_msize:
1532 ; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
1533 ; X86-CLZ-NEXT: retl
1535 ; X64-CLZ-LABEL: cttz_i32_msize:
1537 ; X64-CLZ-NEXT: tzcntl %edi, %eax
1538 ; X64-CLZ-NEXT: retq
1540 ; X64-FASTLZCNT-LABEL: cttz_i32_msize:
1541 ; X64-FASTLZCNT: # %bb.0:
1542 ; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax
1543 ; X64-FASTLZCNT-NEXT: retq
1545 ; X86-FASTLZCNT-LABEL: cttz_i32_msize:
1546 ; X86-FASTLZCNT: # %bb.0:
1547 ; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
1548 ; X86-FASTLZCNT-NEXT: retl
1549 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true)
1553 define i8 @ctlz_xor7_i8_true(i8 %x) {
1554 ; X86-LABEL: ctlz_xor7_i8_true:
1556 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1557 ; X86-NEXT: bsrl %eax, %eax
1558 ; X86-NEXT: # kill: def $al killed $al killed $eax
1561 ; X64-LABEL: ctlz_xor7_i8_true:
1563 ; X64-NEXT: movzbl %dil, %eax
1564 ; X64-NEXT: bsrl %eax, %eax
1565 ; X64-NEXT: # kill: def $al killed $al killed $eax
1568 ; X86-CLZ-LABEL: ctlz_xor7_i8_true:
1570 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1571 ; X86-CLZ-NEXT: bsrl %eax, %eax
1572 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
1573 ; X86-CLZ-NEXT: retl
1575 ; X64-CLZ-LABEL: ctlz_xor7_i8_true:
1577 ; X64-CLZ-NEXT: movzbl %dil, %eax
1578 ; X64-CLZ-NEXT: bsrl %eax, %eax
1579 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
1580 ; X64-CLZ-NEXT: retq
1582 ; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_true:
1583 ; X64-FASTLZCNT: # %bb.0:
1584 ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax
1585 ; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax
1586 ; X64-FASTLZCNT-NEXT: addl $-24, %eax
1587 ; X64-FASTLZCNT-NEXT: xorb $7, %al
1588 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1589 ; X64-FASTLZCNT-NEXT: retq
1591 ; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_true:
1592 ; X86-FASTLZCNT: # %bb.0:
1593 ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1594 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
1595 ; X86-FASTLZCNT-NEXT: addl $-24, %eax
1596 ; X86-FASTLZCNT-NEXT: xorb $7, %al
1597 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1598 ; X86-FASTLZCNT-NEXT: retl
1599 %clz = call i8 @llvm.ctlz.i8(i8 %x, i1 true)
1600 %res = xor i8 %clz, 7
1604 define i8 @ctlz_xor7_i8_false(i8 %x) {
1605 ; X86-LABEL: ctlz_xor7_i8_false:
1607 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1608 ; X86-NEXT: testb %al, %al
1609 ; X86-NEXT: je .LBB28_1
1610 ; X86-NEXT: # %bb.2: # %cond.false
1611 ; X86-NEXT: movzbl %al, %eax
1612 ; X86-NEXT: bsrl %eax, %eax
1613 ; X86-NEXT: xorl $7, %eax
1614 ; X86-NEXT: xorb $7, %al
1615 ; X86-NEXT: # kill: def $al killed $al killed $eax
1617 ; X86-NEXT: .LBB28_1:
1618 ; X86-NEXT: movb $8, %al
1619 ; X86-NEXT: xorb $7, %al
1620 ; X86-NEXT: # kill: def $al killed $al killed $eax
1623 ; X64-LABEL: ctlz_xor7_i8_false:
1625 ; X64-NEXT: testb %dil, %dil
1626 ; X64-NEXT: je .LBB28_1
1627 ; X64-NEXT: # %bb.2: # %cond.false
1628 ; X64-NEXT: movzbl %dil, %eax
1629 ; X64-NEXT: bsrl %eax, %eax
1630 ; X64-NEXT: xorl $7, %eax
1631 ; X64-NEXT: xorb $7, %al
1632 ; X64-NEXT: # kill: def $al killed $al killed $eax
1634 ; X64-NEXT: .LBB28_1:
1635 ; X64-NEXT: movb $8, %al
1636 ; X64-NEXT: xorb $7, %al
1637 ; X64-NEXT: # kill: def $al killed $al killed $eax
1640 ; X86-CLZ-LABEL: ctlz_xor7_i8_false:
1642 ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1643 ; X86-CLZ-NEXT: lzcntl %eax, %eax
1644 ; X86-CLZ-NEXT: addl $-24, %eax
1645 ; X86-CLZ-NEXT: xorb $7, %al
1646 ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
1647 ; X86-CLZ-NEXT: retl
1649 ; X64-CLZ-LABEL: ctlz_xor7_i8_false:
1651 ; X64-CLZ-NEXT: movzbl %dil, %eax
1652 ; X64-CLZ-NEXT: lzcntl %eax, %eax
1653 ; X64-CLZ-NEXT: addl $-24, %eax
1654 ; X64-CLZ-NEXT: xorb $7, %al
1655 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
1656 ; X64-CLZ-NEXT: retq
1658 ; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_false:
1659 ; X64-FASTLZCNT: # %bb.0:
1660 ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax
1661 ; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax
1662 ; X64-FASTLZCNT-NEXT: addl $-24, %eax
1663 ; X64-FASTLZCNT-NEXT: xorb $7, %al
1664 ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1665 ; X64-FASTLZCNT-NEXT: retq
1667 ; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_false:
1668 ; X86-FASTLZCNT: # %bb.0:
1669 ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1670 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
1671 ; X86-FASTLZCNT-NEXT: addl $-24, %eax
1672 ; X86-FASTLZCNT-NEXT: xorb $7, %al
1673 ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax
1674 ; X86-FASTLZCNT-NEXT: retl
1675 %clz = call i8 @llvm.ctlz.i8(i8 %x, i1 false)
1676 %res = xor i8 %clz, 7
1680 define i16 @ctlz_xor15_i16_true(i16 %x) {
1681 ; X86-LABEL: ctlz_xor15_i16_true:
1683 ; X86-NEXT: bsrw {{[0-9]+}}(%esp), %ax
1686 ; X64-LABEL: ctlz_xor15_i16_true:
1688 ; X64-NEXT: bsrw %di, %ax
1691 ; X86-CLZ-LABEL: ctlz_xor15_i16_true:
1693 ; X86-CLZ-NEXT: bsrw {{[0-9]+}}(%esp), %ax
1694 ; X86-CLZ-NEXT: retl
1696 ; X64-CLZ-LABEL: ctlz_xor15_i16_true:
1698 ; X64-CLZ-NEXT: bsrw %di, %ax
1699 ; X64-CLZ-NEXT: retq
1701 ; X64-FASTLZCNT-LABEL: ctlz_xor15_i16_true:
1702 ; X64-FASTLZCNT: # %bb.0:
1703 ; X64-FASTLZCNT-NEXT: lzcntw %di, %ax
1704 ; X64-FASTLZCNT-NEXT: xorl $15, %eax
1705 ; X64-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax
1706 ; X64-FASTLZCNT-NEXT: retq
1708 ; X86-FASTLZCNT-LABEL: ctlz_xor15_i16_true:
1709 ; X86-FASTLZCNT: # %bb.0:
1710 ; X86-FASTLZCNT-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
1711 ; X86-FASTLZCNT-NEXT: xorl $15, %eax
1712 ; X86-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax
1713 ; X86-FASTLZCNT-NEXT: retl
1714 %clz = call i16 @llvm.ctlz.i16(i16 %x, i1 true)
1715 %res = xor i16 %clz, 15
1719 define i32 @ctlz_xor31_i32_false(i32 %x) {
1720 ; X86-LABEL: ctlz_xor31_i32_false:
1722 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1723 ; X86-NEXT: testl %eax, %eax
1724 ; X86-NEXT: je .LBB30_1
1725 ; X86-NEXT: # %bb.2: # %cond.false
1726 ; X86-NEXT: bsrl %eax, %eax
1727 ; X86-NEXT: xorl $31, %eax
1728 ; X86-NEXT: xorl $31, %eax
1730 ; X86-NEXT: .LBB30_1:
1731 ; X86-NEXT: movl $32, %eax
1732 ; X86-NEXT: xorl $31, %eax
1735 ; X64-LABEL: ctlz_xor31_i32_false:
1737 ; X64-NEXT: testl %edi, %edi
1738 ; X64-NEXT: je .LBB30_1
1739 ; X64-NEXT: # %bb.2: # %cond.false
1740 ; X64-NEXT: bsrl %edi, %eax
1741 ; X64-NEXT: xorl $31, %eax
1742 ; X64-NEXT: xorl $31, %eax
1744 ; X64-NEXT: .LBB30_1:
1745 ; X64-NEXT: movl $32, %eax
1746 ; X64-NEXT: xorl $31, %eax
1749 ; X86-CLZ-LABEL: ctlz_xor31_i32_false:
1751 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1752 ; X86-CLZ-NEXT: xorl $31, %eax
1753 ; X86-CLZ-NEXT: retl
1755 ; X64-CLZ-LABEL: ctlz_xor31_i32_false:
1757 ; X64-CLZ-NEXT: lzcntl %edi, %eax
1758 ; X64-CLZ-NEXT: xorl $31, %eax
1759 ; X64-CLZ-NEXT: retq
1761 ; X64-FASTLZCNT-LABEL: ctlz_xor31_i32_false:
1762 ; X64-FASTLZCNT: # %bb.0:
1763 ; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax
1764 ; X64-FASTLZCNT-NEXT: xorl $31, %eax
1765 ; X64-FASTLZCNT-NEXT: retq
1767 ; X86-FASTLZCNT-LABEL: ctlz_xor31_i32_false:
1768 ; X86-FASTLZCNT: # %bb.0:
1769 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1770 ; X86-FASTLZCNT-NEXT: xorl $31, %eax
1771 ; X86-FASTLZCNT-NEXT: retl
1772 %clz = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
1773 %res = xor i32 %clz, 31
1777 define i64 @ctlz_xor63_i64_true(i64 %x) {
1778 ; X86-NOCMOV-LABEL: ctlz_xor63_i64_true:
1779 ; X86-NOCMOV: # %bb.0:
1780 ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
1781 ; X86-NOCMOV-NEXT: testl %eax, %eax
1782 ; X86-NOCMOV-NEXT: jne .LBB31_1
1783 ; X86-NOCMOV-NEXT: # %bb.2:
1784 ; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
1785 ; X86-NOCMOV-NEXT: xorl $31, %eax
1786 ; X86-NOCMOV-NEXT: addl $32, %eax
1787 ; X86-NOCMOV-NEXT: jmp .LBB31_3
1788 ; X86-NOCMOV-NEXT: .LBB31_1:
1789 ; X86-NOCMOV-NEXT: bsrl %eax, %eax
1790 ; X86-NOCMOV-NEXT: xorl $31, %eax
1791 ; X86-NOCMOV-NEXT: .LBB31_3:
1792 ; X86-NOCMOV-NEXT: xorl $63, %eax
1793 ; X86-NOCMOV-NEXT: xorl %edx, %edx
1794 ; X86-NOCMOV-NEXT: retl
1796 ; X86-CMOV-LABEL: ctlz_xor63_i64_true:
1797 ; X86-CMOV: # %bb.0:
1798 ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
1799 ; X86-CMOV-NEXT: bsrl %ecx, %edx
1800 ; X86-CMOV-NEXT: xorl $31, %edx
1801 ; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
1802 ; X86-CMOV-NEXT: xorl $31, %eax
1803 ; X86-CMOV-NEXT: addl $32, %eax
1804 ; X86-CMOV-NEXT: testl %ecx, %ecx
1805 ; X86-CMOV-NEXT: cmovnel %edx, %eax
1806 ; X86-CMOV-NEXT: xorl $63, %eax
1807 ; X86-CMOV-NEXT: xorl %edx, %edx
1808 ; X86-CMOV-NEXT: retl
1810 ; X64-LABEL: ctlz_xor63_i64_true:
1812 ; X64-NEXT: bsrq %rdi, %rax
1815 ; X86-CLZ-LABEL: ctlz_xor63_i64_true:
1817 ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
1818 ; X86-CLZ-NEXT: testl %eax, %eax
1819 ; X86-CLZ-NEXT: jne .LBB31_1
1820 ; X86-CLZ-NEXT: # %bb.2:
1821 ; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1822 ; X86-CLZ-NEXT: addl $32, %eax
1823 ; X86-CLZ-NEXT: jmp .LBB31_3
1824 ; X86-CLZ-NEXT: .LBB31_1:
1825 ; X86-CLZ-NEXT: lzcntl %eax, %eax
1826 ; X86-CLZ-NEXT: .LBB31_3:
1827 ; X86-CLZ-NEXT: xorl $63, %eax
1828 ; X86-CLZ-NEXT: xorl %edx, %edx
1829 ; X86-CLZ-NEXT: retl
1831 ; X64-CLZ-LABEL: ctlz_xor63_i64_true:
1833 ; X64-CLZ-NEXT: bsrq %rdi, %rax
1834 ; X64-CLZ-NEXT: retq
1836 ; X64-FASTLZCNT-LABEL: ctlz_xor63_i64_true:
1837 ; X64-FASTLZCNT: # %bb.0:
1838 ; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax
1839 ; X64-FASTLZCNT-NEXT: xorq $63, %rax
1840 ; X64-FASTLZCNT-NEXT: retq
1842 ; X86-FASTLZCNT-LABEL: ctlz_xor63_i64_true:
1843 ; X86-FASTLZCNT: # %bb.0:
1844 ; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
1845 ; X86-FASTLZCNT-NEXT: testl %eax, %eax
1846 ; X86-FASTLZCNT-NEXT: jne .LBB31_1
1847 ; X86-FASTLZCNT-NEXT: # %bb.2:
1848 ; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
1849 ; X86-FASTLZCNT-NEXT: addl $32, %eax
1850 ; X86-FASTLZCNT-NEXT: jmp .LBB31_3
1851 ; X86-FASTLZCNT-NEXT: .LBB31_1:
1852 ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax
1853 ; X86-FASTLZCNT-NEXT: .LBB31_3:
1854 ; X86-FASTLZCNT-NEXT: xorl $63, %eax
1855 ; X86-FASTLZCNT-NEXT: xorl %edx, %edx
1856 ; X86-FASTLZCNT-NEXT: retl
1857 %clz = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
1858 %res = xor i64 %clz, 63