1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
5 ;; Use cttz to test if we properly prove never-zero. There is a very
6 ;; simple transform from cttz -> cttz_zero_undef if its operand is
8 declare i32 @llvm.cttz.i32(i32, i1)
9 declare i32 @llvm.uadd.sat.i32(i32, i32)
10 declare i32 @llvm.umax.i32(i32, i32)
11 declare i32 @llvm.umin.i32(i32, i32)
12 declare i32 @llvm.smin.i32(i32, i32)
13 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
14 declare i32 @llvm.smax.i32(i32, i32)
15 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
16 declare i32 @llvm.bswap.i32(i32)
17 declare i32 @llvm.bitreverse.i32(i32)
18 declare i32 @llvm.ctpop.i32(i32)
19 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
20 declare i32 @llvm.abs.i32(i32, i1)
21 declare i32 @llvm.fshl.i32(i32, i32, i32)
22 declare i32 @llvm.fshr.i32(i32, i32, i32)
24 define i32 @or_known_nonzero(i32 %x) {
25 ; X86-LABEL: or_known_nonzero:
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
28 ; X86-NEXT: orl $1, %eax
29 ; X86-NEXT: rep bsfl %eax, %eax
32 ; X64-LABEL: or_known_nonzero:
34 ; X64-NEXT: orl $1, %edi
35 ; X64-NEXT: rep bsfl %edi, %eax
38 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
42 define i32 @or_maybe_zero(i32 %x, i32 %y) {
43 ; X86-LABEL: or_maybe_zero:
45 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
46 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
47 ; X86-NEXT: je .LBB1_1
48 ; X86-NEXT: # %bb.2: # %cond.false
49 ; X86-NEXT: rep bsfl %eax, %eax
52 ; X86-NEXT: movl $32, %eax
55 ; X64-LABEL: or_maybe_zero:
57 ; X64-NEXT: orl %esi, %edi
58 ; X64-NEXT: je .LBB1_1
59 ; X64-NEXT: # %bb.2: # %cond.false
60 ; X64-NEXT: rep bsfl %edi, %eax
63 ; X64-NEXT: movl $32, %eax
66 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
70 define i32 @select_known_nonzero(i1 %c, i32 %x) {
71 ; X86-LABEL: select_known_nonzero:
73 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
74 ; X86-NEXT: orl $1, %eax
75 ; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
76 ; X86-NEXT: movl $122, %ecx
77 ; X86-NEXT: cmovnel %eax, %ecx
78 ; X86-NEXT: rep bsfl %ecx, %eax
81 ; X64-LABEL: select_known_nonzero:
83 ; X64-NEXT: orl $1, %esi
84 ; X64-NEXT: testb $1, %dil
85 ; X64-NEXT: movl $122, %eax
86 ; X64-NEXT: cmovnel %esi, %eax
87 ; X64-NEXT: rep bsfl %eax, %eax
90 %z = select i1 %c, i32 %y, i32 122
91 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
95 define i32 @select_maybe_zero(i1 %c, i32 %x) {
96 ; X86-LABEL: select_maybe_zero:
98 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
99 ; X86-NEXT: orl $1, %ecx
100 ; X86-NEXT: xorl %eax, %eax
101 ; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
102 ; X86-NEXT: cmovnel %ecx, %eax
103 ; X86-NEXT: testl %eax, %eax
104 ; X86-NEXT: je .LBB3_1
105 ; X86-NEXT: # %bb.2: # %cond.false
106 ; X86-NEXT: rep bsfl %eax, %eax
109 ; X86-NEXT: movl $32, %eax
112 ; X64-LABEL: select_maybe_zero:
114 ; X64-NEXT: orl $1, %esi
115 ; X64-NEXT: xorl %eax, %eax
116 ; X64-NEXT: testb $1, %dil
117 ; X64-NEXT: cmovnel %esi, %eax
118 ; X64-NEXT: testl %eax, %eax
119 ; X64-NEXT: je .LBB3_1
120 ; X64-NEXT: # %bb.2: # %cond.false
121 ; X64-NEXT: rep bsfl %eax, %eax
124 ; X64-NEXT: movl $32, %eax
127 %z = select i1 %c, i32 %y, i32 0
128 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
132 define i32 @shl_known_nonzero_1s_bit_set(i32 %x) {
133 ; X86-LABEL: shl_known_nonzero_1s_bit_set:
135 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
136 ; X86-NEXT: movl $123, %eax
137 ; X86-NEXT: shll %cl, %eax
138 ; X86-NEXT: rep bsfl %eax, %eax
141 ; X64-LABEL: shl_known_nonzero_1s_bit_set:
143 ; X64-NEXT: movl %edi, %ecx
144 ; X64-NEXT: movl $123, %eax
145 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
146 ; X64-NEXT: shll %cl, %eax
147 ; X64-NEXT: rep bsfl %eax, %eax
150 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
154 define i32 @shl_known_nonzero_nsw(i32 %x, i32 %yy) {
155 ; X86-LABEL: shl_known_nonzero_nsw:
157 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
158 ; X86-NEXT: movl $256, %eax # imm = 0x100
159 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
160 ; X86-NEXT: shll %cl, %eax
161 ; X86-NEXT: rep bsfl %eax, %eax
164 ; X64-LABEL: shl_known_nonzero_nsw:
166 ; X64-NEXT: movl %edi, %ecx
167 ; X64-NEXT: orl $256, %esi # imm = 0x100
168 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
169 ; X64-NEXT: shll %cl, %esi
170 ; X64-NEXT: rep bsfl %esi, %eax
173 %z = shl nsw i32 %y, %x
174 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
178 define i32 @shl_known_nonzero_nuw(i32 %x, i32 %yy) {
179 ; X86-LABEL: shl_known_nonzero_nuw:
181 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
182 ; X86-NEXT: movl $256, %eax # imm = 0x100
183 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
184 ; X86-NEXT: shll %cl, %eax
185 ; X86-NEXT: rep bsfl %eax, %eax
188 ; X64-LABEL: shl_known_nonzero_nuw:
190 ; X64-NEXT: movl %edi, %ecx
191 ; X64-NEXT: orl $256, %esi # imm = 0x100
192 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
193 ; X64-NEXT: shll %cl, %esi
194 ; X64-NEXT: rep bsfl %esi, %eax
197 %z = shl nuw i32 %y, %x
198 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
202 define i32 @shl_maybe_zero(i32 %x, i32 %y) {
203 ; X86-LABEL: shl_maybe_zero:
205 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
207 ; X86-NEXT: shll %cl, %eax
208 ; X86-NEXT: testl %eax, %eax
209 ; X86-NEXT: je .LBB7_1
210 ; X86-NEXT: # %bb.2: # %cond.false
211 ; X86-NEXT: rep bsfl %eax, %eax
214 ; X86-NEXT: movl $32, %eax
217 ; X64-LABEL: shl_maybe_zero:
219 ; X64-NEXT: movl %edi, %ecx
220 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
221 ; X64-NEXT: shll %cl, %esi
222 ; X64-NEXT: testl %esi, %esi
223 ; X64-NEXT: je .LBB7_1
224 ; X64-NEXT: # %bb.2: # %cond.false
225 ; X64-NEXT: rep bsfl %esi, %eax
228 ; X64-NEXT: movl $32, %eax
230 %z = shl nuw nsw i32 %y, %x
231 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
235 define i32 @uaddsat_known_nonzero(i32 %x) {
236 ; X86-LABEL: uaddsat_known_nonzero:
238 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
239 ; X86-NEXT: incl %eax
240 ; X86-NEXT: movl $-1, %ecx
241 ; X86-NEXT: cmovnel %eax, %ecx
242 ; X86-NEXT: rep bsfl %ecx, %eax
245 ; X64-LABEL: uaddsat_known_nonzero:
247 ; X64-NEXT: incl %edi
248 ; X64-NEXT: movl $-1, %eax
249 ; X64-NEXT: cmovnel %edi, %eax
250 ; X64-NEXT: rep bsfl %eax, %eax
252 %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 1)
253 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
257 define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
258 ; X86-LABEL: uaddsat_maybe_zero:
260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
261 ; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
262 ; X86-NEXT: movl $-1, %eax
263 ; X86-NEXT: cmovael %ecx, %eax
264 ; X86-NEXT: testl %eax, %eax
265 ; X86-NEXT: je .LBB9_1
266 ; X86-NEXT: # %bb.2: # %cond.false
267 ; X86-NEXT: rep bsfl %eax, %eax
270 ; X86-NEXT: movl $32, %eax
273 ; X64-LABEL: uaddsat_maybe_zero:
275 ; X64-NEXT: addl %esi, %edi
276 ; X64-NEXT: movl $-1, %eax
277 ; X64-NEXT: cmovael %edi, %eax
278 ; X64-NEXT: testl %eax, %eax
279 ; X64-NEXT: je .LBB9_1
280 ; X64-NEXT: # %bb.2: # %cond.false
281 ; X64-NEXT: rep bsfl %eax, %eax
284 ; X64-NEXT: movl $32, %eax
286 %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
287 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
291 define i32 @umax_known_nonzero(i32 %x, i32 %y) {
292 ; X86-LABEL: umax_known_nonzero:
294 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
295 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
296 ; X86-NEXT: movl $4, %edx
297 ; X86-NEXT: shll %cl, %edx
298 ; X86-NEXT: cmpl %edx, %eax
299 ; X86-NEXT: cmoval %eax, %edx
300 ; X86-NEXT: rep bsfl %edx, %eax
303 ; X64-LABEL: umax_known_nonzero:
305 ; X64-NEXT: movl %esi, %ecx
306 ; X64-NEXT: movl $4, %eax
307 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
308 ; X64-NEXT: shll %cl, %eax
309 ; X64-NEXT: cmpl %eax, %edi
310 ; X64-NEXT: cmoval %edi, %eax
311 ; X64-NEXT: rep bsfl %eax, %eax
313 %yy = shl nuw i32 4, %y
314 %z = call i32 @llvm.umax.i32(i32 %x, i32 %yy)
315 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
319 define i32 @umax_maybe_zero(i32 %x, i32 %y) {
320 ; X86-LABEL: umax_maybe_zero:
322 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
323 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
324 ; X86-NEXT: cmpl %eax, %ecx
325 ; X86-NEXT: cmoval %ecx, %eax
326 ; X86-NEXT: testl %eax, %eax
327 ; X86-NEXT: je .LBB11_1
328 ; X86-NEXT: # %bb.2: # %cond.false
329 ; X86-NEXT: rep bsfl %eax, %eax
331 ; X86-NEXT: .LBB11_1:
332 ; X86-NEXT: movl $32, %eax
335 ; X64-LABEL: umax_maybe_zero:
337 ; X64-NEXT: cmpl %esi, %edi
338 ; X64-NEXT: cmoval %edi, %esi
339 ; X64-NEXT: testl %esi, %esi
340 ; X64-NEXT: je .LBB11_1
341 ; X64-NEXT: # %bb.2: # %cond.false
342 ; X64-NEXT: rep bsfl %esi, %eax
344 ; X64-NEXT: .LBB11_1:
345 ; X64-NEXT: movl $32, %eax
347 %z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
348 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
352 define i32 @umin_known_nonzero(i32 %xx, i32 %yy) {
353 ; X86-LABEL: umin_known_nonzero:
355 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
356 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
357 ; X86-NEXT: movl $4, %edx
358 ; X86-NEXT: shll %cl, %edx
359 ; X86-NEXT: addl $4, %eax
360 ; X86-NEXT: cmpl %eax, %edx
361 ; X86-NEXT: cmovbl %edx, %eax
362 ; X86-NEXT: rep bsfl %eax, %eax
365 ; X64-LABEL: umin_known_nonzero:
367 ; X64-NEXT: movl %edi, %ecx
368 ; X64-NEXT: movl $4, %eax
369 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
370 ; X64-NEXT: shll %cl, %eax
371 ; X64-NEXT: addl $4, %esi
372 ; X64-NEXT: cmpl %esi, %eax
373 ; X64-NEXT: cmovbl %eax, %esi
374 ; X64-NEXT: rep bsfl %esi, %eax
376 %x = shl nuw i32 4, %xx
377 %y = add nuw nsw i32 %yy, 4
378 %z = call i32 @llvm.umin.i32(i32 %x, i32 %y)
379 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
383 define i32 @umin_maybe_zero(i32 %x, i32 %y) {
384 ; X86-LABEL: umin_maybe_zero:
386 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
387 ; X86-NEXT: cmpl $54, %ecx
388 ; X86-NEXT: movl $54, %eax
389 ; X86-NEXT: cmovbl %ecx, %eax
390 ; X86-NEXT: testl %eax, %eax
391 ; X86-NEXT: je .LBB13_1
392 ; X86-NEXT: # %bb.2: # %cond.false
393 ; X86-NEXT: rep bsfl %eax, %eax
395 ; X86-NEXT: .LBB13_1:
396 ; X86-NEXT: movl $32, %eax
399 ; X64-LABEL: umin_maybe_zero:
401 ; X64-NEXT: cmpl $54, %edi
402 ; X64-NEXT: movl $54, %eax
403 ; X64-NEXT: cmovbl %edi, %eax
404 ; X64-NEXT: testl %eax, %eax
405 ; X64-NEXT: je .LBB13_1
406 ; X64-NEXT: # %bb.2: # %cond.false
407 ; X64-NEXT: rep bsfl %eax, %eax
409 ; X64-NEXT: .LBB13_1:
410 ; X64-NEXT: movl $32, %eax
412 %z = call i32 @llvm.umin.i32(i32 %x, i32 54)
413 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
417 define i32 @smin_known_nonzero(i32 %xx, i32 %yy) {
418 ; X86-LABEL: smin_known_nonzero:
420 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
421 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
422 ; X86-NEXT: movl $4, %edx
423 ; X86-NEXT: shll %cl, %edx
424 ; X86-NEXT: addl $4, %eax
425 ; X86-NEXT: cmpl %eax, %edx
426 ; X86-NEXT: cmovll %edx, %eax
427 ; X86-NEXT: rep bsfl %eax, %eax
430 ; X64-LABEL: smin_known_nonzero:
432 ; X64-NEXT: movl %edi, %ecx
433 ; X64-NEXT: movl $4, %eax
434 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
435 ; X64-NEXT: shll %cl, %eax
436 ; X64-NEXT: addl $4, %esi
437 ; X64-NEXT: cmpl %esi, %eax
438 ; X64-NEXT: cmovll %eax, %esi
439 ; X64-NEXT: rep bsfl %esi, %eax
441 %x = shl nuw i32 4, %xx
442 %y = add nuw nsw i32 %yy, 4
443 %z = call i32 @llvm.smin.i32(i32 %x, i32 %y)
444 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
448 define i32 @smin_known_zero(i32 %x, i32 %y) {
449 ; X86-LABEL: smin_known_zero:
451 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
452 ; X86-NEXT: cmpl $-54, %eax
453 ; X86-NEXT: movl $-54, %ecx
454 ; X86-NEXT: cmovll %eax, %ecx
455 ; X86-NEXT: rep bsfl %ecx, %eax
458 ; X64-LABEL: smin_known_zero:
460 ; X64-NEXT: cmpl $-54, %edi
461 ; X64-NEXT: movl $-54, %eax
462 ; X64-NEXT: cmovll %edi, %eax
463 ; X64-NEXT: rep bsfl %eax, %eax
465 %z = call i32 @llvm.smin.i32(i32 %x, i32 -54)
466 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
470 define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
471 ; X86-LABEL: smin_known_zero_vec:
473 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967242,4294967273,4294967284,4294967295]
474 ; X86-NEXT: movdqa %xmm1, %xmm2
475 ; X86-NEXT: pcmpgtd %xmm0, %xmm2
476 ; X86-NEXT: pand %xmm2, %xmm0
477 ; X86-NEXT: pandn %xmm1, %xmm2
478 ; X86-NEXT: por %xmm2, %xmm0
479 ; X86-NEXT: pcmpeqd %xmm1, %xmm1
480 ; X86-NEXT: paddd %xmm0, %xmm1
481 ; X86-NEXT: pand %xmm1, %xmm0
482 ; X86-NEXT: pxor %xmm1, %xmm1
483 ; X86-NEXT: pcmpeqd %xmm1, %xmm0
484 ; X86-NEXT: psrld $31, %xmm0
487 ; X64-LABEL: smin_known_zero_vec:
489 ; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
490 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
491 ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
492 ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
493 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
494 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
495 ; X64-NEXT: vpsrld $31, %xmm0, %xmm0
497 %z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 -54, i32 -23, i32 -12, i32 -1>)
498 %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
499 %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
500 %ret = zext <4 x i1> %3 to <4 x i32>
504 define i32 @smin_maybe_zero(i32 %x, i32 %y) {
505 ; X86-LABEL: smin_maybe_zero:
507 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
508 ; X86-NEXT: cmpl $54, %ecx
509 ; X86-NEXT: movl $54, %eax
510 ; X86-NEXT: cmovll %ecx, %eax
511 ; X86-NEXT: testl %eax, %eax
512 ; X86-NEXT: je .LBB17_1
513 ; X86-NEXT: # %bb.2: # %cond.false
514 ; X86-NEXT: rep bsfl %eax, %eax
516 ; X86-NEXT: .LBB17_1:
517 ; X86-NEXT: movl $32, %eax
520 ; X64-LABEL: smin_maybe_zero:
522 ; X64-NEXT: cmpl $54, %edi
523 ; X64-NEXT: movl $54, %eax
524 ; X64-NEXT: cmovll %edi, %eax
525 ; X64-NEXT: testl %eax, %eax
526 ; X64-NEXT: je .LBB17_1
527 ; X64-NEXT: # %bb.2: # %cond.false
528 ; X64-NEXT: rep bsfl %eax, %eax
530 ; X64-NEXT: .LBB17_1:
531 ; X64-NEXT: movl $32, %eax
533 %z = call i32 @llvm.smin.i32(i32 %x, i32 54)
534 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
538 define i32 @smax_known_nonzero(i32 %xx, i32 %yy) {
539 ; X86-LABEL: smax_known_nonzero:
541 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
542 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
543 ; X86-NEXT: movl $4, %edx
544 ; X86-NEXT: shll %cl, %edx
545 ; X86-NEXT: addl $4, %eax
546 ; X86-NEXT: cmpl %eax, %edx
547 ; X86-NEXT: cmovgl %edx, %eax
548 ; X86-NEXT: rep bsfl %eax, %eax
551 ; X64-LABEL: smax_known_nonzero:
553 ; X64-NEXT: movl %edi, %ecx
554 ; X64-NEXT: movl $4, %eax
555 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
556 ; X64-NEXT: shll %cl, %eax
557 ; X64-NEXT: addl $4, %esi
558 ; X64-NEXT: cmpl %esi, %eax
559 ; X64-NEXT: cmovgl %eax, %esi
560 ; X64-NEXT: rep bsfl %esi, %eax
562 %x = shl nuw i32 4, %xx
563 %y = add nuw nsw i32 %yy, 4
564 %z = call i32 @llvm.smax.i32(i32 %x, i32 %y)
565 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
569 define i32 @smax_maybe_zero(i32 %x, i32 %y) {
570 ; X86-LABEL: smax_maybe_zero:
572 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
573 ; X86-NEXT: cmpl $55, %eax
574 ; X86-NEXT: movl $54, %ecx
575 ; X86-NEXT: cmovgel %eax, %ecx
576 ; X86-NEXT: rep bsfl %ecx, %eax
579 ; X64-LABEL: smax_maybe_zero:
581 ; X64-NEXT: cmpl $55, %edi
582 ; X64-NEXT: movl $54, %eax
583 ; X64-NEXT: cmovgel %edi, %eax
584 ; X64-NEXT: rep bsfl %eax, %eax
586 %z = call i32 @llvm.smax.i32(i32 %x, i32 54)
587 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
591 define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
592 ; X86-LABEL: smax_known_zero_vec:
594 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [54,23,12,1]
595 ; X86-NEXT: movdqa %xmm0, %xmm2
596 ; X86-NEXT: pcmpgtd %xmm1, %xmm2
597 ; X86-NEXT: pand %xmm2, %xmm0
598 ; X86-NEXT: pandn %xmm1, %xmm2
599 ; X86-NEXT: por %xmm2, %xmm0
600 ; X86-NEXT: pcmpeqd %xmm1, %xmm1
601 ; X86-NEXT: paddd %xmm0, %xmm1
602 ; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
603 ; X86-NEXT: pxor %xmm1, %xmm0
604 ; X86-NEXT: pcmpgtd %xmm1, %xmm0
605 ; X86-NEXT: psrld $31, %xmm0
608 ; X64-LABEL: smax_known_zero_vec:
610 ; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
611 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
612 ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
613 ; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
614 ; X64-NEXT: vpminud %xmm1, %xmm0, %xmm1
615 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
616 ; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
618 %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>)
619 %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
620 %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
621 %ret = zext <4 x i1> %3 to <4 x i32>
625 define i32 @smax_known_zero(i32 %x, i32 %y) {
626 ; X86-LABEL: smax_known_zero:
628 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
629 ; X86-NEXT: testl %ecx, %ecx
630 ; X86-NEXT: movl $-1, %eax
631 ; X86-NEXT: cmovnsl %ecx, %eax
632 ; X86-NEXT: testl %eax, %eax
633 ; X86-NEXT: je .LBB21_1
634 ; X86-NEXT: # %bb.2: # %cond.false
635 ; X86-NEXT: rep bsfl %eax, %eax
637 ; X86-NEXT: .LBB21_1:
638 ; X86-NEXT: movl $32, %eax
641 ; X64-LABEL: smax_known_zero:
643 ; X64-NEXT: testl %edi, %edi
644 ; X64-NEXT: movl $-1, %eax
645 ; X64-NEXT: cmovnsl %edi, %eax
646 ; X64-NEXT: testl %eax, %eax
647 ; X64-NEXT: je .LBB21_1
648 ; X64-NEXT: # %bb.2: # %cond.false
649 ; X64-NEXT: rep bsfl %eax, %eax
651 ; X64-NEXT: .LBB21_1:
652 ; X64-NEXT: movl $32, %eax
654 %z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
655 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
659 define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
660 ; X86-LABEL: rotr_known_nonzero:
662 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
663 ; X86-NEXT: movl $256, %eax # imm = 0x100
664 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
665 ; X86-NEXT: rorl %cl, %eax
666 ; X86-NEXT: testl %eax, %eax
667 ; X86-NEXT: je .LBB22_1
668 ; X86-NEXT: # %bb.2: # %cond.false
669 ; X86-NEXT: rep bsfl %eax, %eax
671 ; X86-NEXT: .LBB22_1:
672 ; X86-NEXT: movl $32, %eax
675 ; X64-LABEL: rotr_known_nonzero:
677 ; X64-NEXT: movl %esi, %ecx
678 ; X64-NEXT: orl $256, %edi # imm = 0x100
679 ; X64-NEXT: movl %edi, %eax
680 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
681 ; X64-NEXT: rorl %cl, %eax
682 ; X64-NEXT: testl %edi, %edi
683 ; X64-NEXT: je .LBB22_1
684 ; X64-NEXT: # %bb.2: # %cond.false
685 ; X64-NEXT: rep bsfl %eax, %eax
687 ; X64-NEXT: .LBB22_1:
688 ; X64-NEXT: movl $32, %eax
691 %shr = lshr i32 %x, %y
692 %sub = sub i32 32, %y
693 %shl = shl i32 %x, %sub
694 %z = or i32 %shl, %shr
695 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
699 define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
700 ; X86-LABEL: rotr_maybe_zero:
702 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
704 ; X86-NEXT: rorl %cl, %eax
705 ; X86-NEXT: testl %eax, %eax
706 ; X86-NEXT: je .LBB23_1
707 ; X86-NEXT: # %bb.2: # %cond.false
708 ; X86-NEXT: rep bsfl %eax, %eax
710 ; X86-NEXT: .LBB23_1:
711 ; X86-NEXT: movl $32, %eax
714 ; X64-LABEL: rotr_maybe_zero:
716 ; X64-NEXT: movl %esi, %ecx
717 ; X64-NEXT: movl %edi, %eax
718 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
719 ; X64-NEXT: rorl %cl, %eax
720 ; X64-NEXT: testl %edi, %edi
721 ; X64-NEXT: je .LBB23_1
722 ; X64-NEXT: # %bb.2: # %cond.false
723 ; X64-NEXT: rep bsfl %eax, %eax
725 ; X64-NEXT: .LBB23_1:
726 ; X64-NEXT: movl $32, %eax
728 %shr = lshr i32 %x, %y
729 %sub = sub i32 32, %y
730 %shl = shl i32 %x, %sub
731 %z = or i32 %shl, %shr
732 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
736 define i32 @rotr_with_fshr_known_nonzero(i32 %xx, i32 %y) {
737 ; X86-LABEL: rotr_with_fshr_known_nonzero:
739 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
740 ; X86-NEXT: movl $256, %eax # imm = 0x100
741 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
742 ; X86-NEXT: rorl %cl, %eax
743 ; X86-NEXT: rep bsfl %eax, %eax
746 ; X64-LABEL: rotr_with_fshr_known_nonzero:
748 ; X64-NEXT: movl %esi, %ecx
749 ; X64-NEXT: orl $256, %edi # imm = 0x100
750 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
751 ; X64-NEXT: rorl %cl, %edi
752 ; X64-NEXT: rep bsfl %edi, %eax
755 %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
756 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
760 define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
761 ; X86-LABEL: rotr_with_fshr_maybe_zero:
763 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
764 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
765 ; X86-NEXT: rorl %cl, %eax
766 ; X86-NEXT: testl %eax, %eax
767 ; X86-NEXT: je .LBB25_1
768 ; X86-NEXT: # %bb.2: # %cond.false
769 ; X86-NEXT: rep bsfl %eax, %eax
771 ; X86-NEXT: .LBB25_1:
772 ; X86-NEXT: movl $32, %eax
775 ; X64-LABEL: rotr_with_fshr_maybe_zero:
777 ; X64-NEXT: movl %esi, %ecx
778 ; X64-NEXT: movl %edi, %eax
779 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
780 ; X64-NEXT: rorl %cl, %eax
781 ; X64-NEXT: testl %edi, %edi
782 ; X64-NEXT: je .LBB25_1
783 ; X64-NEXT: # %bb.2: # %cond.false
784 ; X64-NEXT: rep bsfl %eax, %eax
786 ; X64-NEXT: .LBB25_1:
787 ; X64-NEXT: movl $32, %eax
789 %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
790 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
794 define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
795 ; X86-LABEL: rotl_known_nonzero:
797 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
798 ; X86-NEXT: movl $256, %eax # imm = 0x100
799 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
800 ; X86-NEXT: roll %cl, %eax
801 ; X86-NEXT: testl %eax, %eax
802 ; X86-NEXT: je .LBB26_1
803 ; X86-NEXT: # %bb.2: # %cond.false
804 ; X86-NEXT: rep bsfl %eax, %eax
806 ; X86-NEXT: .LBB26_1:
807 ; X86-NEXT: movl $32, %eax
810 ; X64-LABEL: rotl_known_nonzero:
812 ; X64-NEXT: movl %esi, %ecx
813 ; X64-NEXT: orl $256, %edi # imm = 0x100
814 ; X64-NEXT: movl %edi, %eax
815 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
816 ; X64-NEXT: roll %cl, %eax
817 ; X64-NEXT: testl %edi, %edi
818 ; X64-NEXT: je .LBB26_1
819 ; X64-NEXT: # %bb.2: # %cond.false
820 ; X64-NEXT: rep bsfl %eax, %eax
822 ; X64-NEXT: .LBB26_1:
823 ; X64-NEXT: movl $32, %eax
826 %shl = shl i32 %x, %y
827 %sub = sub i32 32, %y
828 %shr = lshr i32 %x, %sub
829 %z = or i32 %shr, %shl
830 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
834 define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
835 ; X86-LABEL: rotl_maybe_zero:
837 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
838 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
839 ; X86-NEXT: roll %cl, %eax
840 ; X86-NEXT: testl %eax, %eax
841 ; X86-NEXT: je .LBB27_1
842 ; X86-NEXT: # %bb.2: # %cond.false
843 ; X86-NEXT: rep bsfl %eax, %eax
845 ; X86-NEXT: .LBB27_1:
846 ; X86-NEXT: movl $32, %eax
849 ; X64-LABEL: rotl_maybe_zero:
851 ; X64-NEXT: movl %esi, %ecx
852 ; X64-NEXT: movl %edi, %eax
853 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
854 ; X64-NEXT: roll %cl, %eax
855 ; X64-NEXT: testl %edi, %edi
856 ; X64-NEXT: je .LBB27_1
857 ; X64-NEXT: # %bb.2: # %cond.false
858 ; X64-NEXT: rep bsfl %eax, %eax
860 ; X64-NEXT: .LBB27_1:
861 ; X64-NEXT: movl $32, %eax
863 %shl = shl i32 %x, %y
864 %sub = sub i32 32, %y
865 %shr = lshr i32 %x, %sub
866 %z = or i32 %shr, %shl
867 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
871 define i32 @rotl_with_fshl_known_nonzero(i32 %xx, i32 %y) {
872 ; X86-LABEL: rotl_with_fshl_known_nonzero:
874 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
875 ; X86-NEXT: movl $256, %eax # imm = 0x100
876 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
877 ; X86-NEXT: roll %cl, %eax
878 ; X86-NEXT: rep bsfl %eax, %eax
881 ; X64-LABEL: rotl_with_fshl_known_nonzero:
883 ; X64-NEXT: movl %esi, %ecx
884 ; X64-NEXT: orl $256, %edi # imm = 0x100
885 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
886 ; X64-NEXT: roll %cl, %edi
887 ; X64-NEXT: rep bsfl %edi, %eax
890 %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
891 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
895 define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
896 ; X86-LABEL: rotl_with_fshl_maybe_zero:
898 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
899 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
900 ; X86-NEXT: roll %cl, %eax
901 ; X86-NEXT: testl %eax, %eax
902 ; X86-NEXT: je .LBB29_1
903 ; X86-NEXT: # %bb.2: # %cond.false
904 ; X86-NEXT: rep bsfl %eax, %eax
906 ; X86-NEXT: .LBB29_1:
907 ; X86-NEXT: movl $32, %eax
910 ; X64-LABEL: rotl_with_fshl_maybe_zero:
912 ; X64-NEXT: movl %esi, %ecx
913 ; X64-NEXT: movl %edi, %eax
914 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
915 ; X64-NEXT: roll %cl, %eax
916 ; X64-NEXT: testl %edi, %edi
917 ; X64-NEXT: je .LBB29_1
918 ; X64-NEXT: # %bb.2: # %cond.false
919 ; X64-NEXT: rep bsfl %eax, %eax
921 ; X64-NEXT: .LBB29_1:
922 ; X64-NEXT: movl $32, %eax
924 %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
925 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
929 define i32 @sra_known_nonzero_sign_bit_set(i32 %x) {
930 ; X86-LABEL: sra_known_nonzero_sign_bit_set:
932 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
933 ; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
934 ; X86-NEXT: sarl %cl, %eax
935 ; X86-NEXT: rep bsfl %eax, %eax
938 ; X64-LABEL: sra_known_nonzero_sign_bit_set:
940 ; X64-NEXT: movl %edi, %ecx
941 ; X64-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
942 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
943 ; X64-NEXT: sarl %cl, %eax
944 ; X64-NEXT: rep bsfl %eax, %eax
946 %z = ashr i32 2147606891, %x
947 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
951 define i32 @sra_known_nonzero_exact(i32 %x, i32 %yy) {
952 ; X86-LABEL: sra_known_nonzero_exact:
954 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
955 ; X86-NEXT: movl $256, %eax # imm = 0x100
956 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
957 ; X86-NEXT: sarl %cl, %eax
958 ; X86-NEXT: rep bsfl %eax, %eax
961 ; X64-LABEL: sra_known_nonzero_exact:
963 ; X64-NEXT: movl %edi, %ecx
964 ; X64-NEXT: orl $256, %esi # imm = 0x100
965 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
966 ; X64-NEXT: sarl %cl, %esi
967 ; X64-NEXT: rep bsfl %esi, %eax
970 %z = ashr exact i32 %y, %x
971 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
975 define i32 @sra_maybe_zero(i32 %x, i32 %y) {
976 ; X86-LABEL: sra_maybe_zero:
978 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
979 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
980 ; X86-NEXT: sarl %cl, %eax
981 ; X86-NEXT: testl %eax, %eax
982 ; X86-NEXT: je .LBB32_1
983 ; X86-NEXT: # %bb.2: # %cond.false
984 ; X86-NEXT: rep bsfl %eax, %eax
986 ; X86-NEXT: .LBB32_1:
987 ; X86-NEXT: movl $32, %eax
990 ; X64-LABEL: sra_maybe_zero:
992 ; X64-NEXT: movl %edi, %ecx
993 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
994 ; X64-NEXT: sarl %cl, %esi
995 ; X64-NEXT: testl %esi, %esi
996 ; X64-NEXT: je .LBB32_1
997 ; X64-NEXT: # %bb.2: # %cond.false
998 ; X64-NEXT: rep bsfl %esi, %eax
1000 ; X64-NEXT: .LBB32_1:
1001 ; X64-NEXT: movl $32, %eax
1003 %z = ashr exact i32 %y, %x
1004 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1008 define i32 @srl_known_nonzero_sign_bit_set(i32 %x) {
1009 ; X86-LABEL: srl_known_nonzero_sign_bit_set:
1011 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1012 ; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
1013 ; X86-NEXT: shrl %cl, %eax
1014 ; X86-NEXT: rep bsfl %eax, %eax
1017 ; X64-LABEL: srl_known_nonzero_sign_bit_set:
1019 ; X64-NEXT: movl %edi, %ecx
1020 ; X64-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
1021 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
1022 ; X64-NEXT: shrl %cl, %eax
1023 ; X64-NEXT: rep bsfl %eax, %eax
1025 %z = lshr i32 2147606891, %x
1026 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1030 define i32 @srl_known_nonzero_exact(i32 %x, i32 %yy) {
1031 ; X86-LABEL: srl_known_nonzero_exact:
1033 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1034 ; X86-NEXT: movl $256, %eax # imm = 0x100
1035 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
1036 ; X86-NEXT: shrl %cl, %eax
1037 ; X86-NEXT: rep bsfl %eax, %eax
1040 ; X64-LABEL: srl_known_nonzero_exact:
1042 ; X64-NEXT: movl %edi, %ecx
1043 ; X64-NEXT: orl $256, %esi # imm = 0x100
1044 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
1045 ; X64-NEXT: shrl %cl, %esi
1046 ; X64-NEXT: rep bsfl %esi, %eax
1048 %y = or i32 %yy, 256
1049 %z = lshr exact i32 %y, %x
1050 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1054 define i32 @srl_maybe_zero(i32 %x, i32 %y) {
1055 ; X86-LABEL: srl_maybe_zero:
1057 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1058 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1059 ; X86-NEXT: shrl %cl, %eax
1060 ; X86-NEXT: testl %eax, %eax
1061 ; X86-NEXT: je .LBB35_1
1062 ; X86-NEXT: # %bb.2: # %cond.false
1063 ; X86-NEXT: rep bsfl %eax, %eax
1065 ; X86-NEXT: .LBB35_1:
1066 ; X86-NEXT: movl $32, %eax
1069 ; X64-LABEL: srl_maybe_zero:
1071 ; X64-NEXT: movl %edi, %ecx
1072 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
1073 ; X64-NEXT: shrl %cl, %esi
1074 ; X64-NEXT: testl %esi, %esi
1075 ; X64-NEXT: je .LBB35_1
1076 ; X64-NEXT: # %bb.2: # %cond.false
1077 ; X64-NEXT: rep bsfl %esi, %eax
1079 ; X64-NEXT: .LBB35_1:
1080 ; X64-NEXT: movl $32, %eax
1082 %z = lshr exact i32 %y, %x
1083 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1087 define i32 @udiv_known_nonzero(i32 %xx, i32 %y) {
1088 ; X86-LABEL: udiv_known_nonzero:
1090 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1091 ; X86-NEXT: orl $64, %eax
1092 ; X86-NEXT: xorl %edx, %edx
1093 ; X86-NEXT: divl {{[0-9]+}}(%esp)
1094 ; X86-NEXT: rep bsfl %eax, %eax
1097 ; X64-LABEL: udiv_known_nonzero:
1099 ; X64-NEXT: movl %edi, %eax
1100 ; X64-NEXT: orl $64, %eax
1101 ; X64-NEXT: xorl %edx, %edx
1102 ; X64-NEXT: divl %esi
1103 ; X64-NEXT: rep bsfl %eax, %eax
1106 %z = udiv exact i32 %x, %y
1107 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1111 define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
1112 ; X86-LABEL: udiv_maybe_zero:
1114 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1115 ; X86-NEXT: xorl %edx, %edx
1116 ; X86-NEXT: divl {{[0-9]+}}(%esp)
1117 ; X86-NEXT: testl %eax, %eax
1118 ; X86-NEXT: je .LBB37_1
1119 ; X86-NEXT: # %bb.2: # %cond.false
1120 ; X86-NEXT: rep bsfl %eax, %eax
1122 ; X86-NEXT: .LBB37_1:
1123 ; X86-NEXT: movl $32, %eax
1126 ; X64-LABEL: udiv_maybe_zero:
1128 ; X64-NEXT: movl %edi, %eax
1129 ; X64-NEXT: xorl %edx, %edx
1130 ; X64-NEXT: divl %esi
1131 ; X64-NEXT: testl %eax, %eax
1132 ; X64-NEXT: je .LBB37_1
1133 ; X64-NEXT: # %bb.2: # %cond.false
1134 ; X64-NEXT: rep bsfl %eax, %eax
1136 ; X64-NEXT: .LBB37_1:
1137 ; X64-NEXT: movl $32, %eax
1139 %z = udiv exact i32 %x, %y
1140 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1144 define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) {
1145 ; X86-LABEL: sdiv_known_nonzero:
1147 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1148 ; X86-NEXT: orl $64, %eax
1150 ; X86-NEXT: idivl {{[0-9]+}}(%esp)
1151 ; X86-NEXT: rep bsfl %eax, %eax
1154 ; X64-LABEL: sdiv_known_nonzero:
1156 ; X64-NEXT: movl %edi, %eax
1157 ; X64-NEXT: orl $64, %eax
1159 ; X64-NEXT: idivl %esi
1160 ; X64-NEXT: rep bsfl %eax, %eax
1163 %z = sdiv exact i32 %x, %y
1164 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1168 define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
1169 ; X86-LABEL: sdiv_maybe_zero:
1171 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1173 ; X86-NEXT: idivl {{[0-9]+}}(%esp)
1174 ; X86-NEXT: testl %eax, %eax
1175 ; X86-NEXT: je .LBB39_1
1176 ; X86-NEXT: # %bb.2: # %cond.false
1177 ; X86-NEXT: rep bsfl %eax, %eax
1179 ; X86-NEXT: .LBB39_1:
1180 ; X86-NEXT: movl $32, %eax
1183 ; X64-LABEL: sdiv_maybe_zero:
1185 ; X64-NEXT: movl %edi, %eax
1187 ; X64-NEXT: idivl %esi
1188 ; X64-NEXT: testl %eax, %eax
1189 ; X64-NEXT: je .LBB39_1
1190 ; X64-NEXT: # %bb.2: # %cond.false
1191 ; X64-NEXT: rep bsfl %eax, %eax
1193 ; X64-NEXT: .LBB39_1:
1194 ; X64-NEXT: movl $32, %eax
1196 %z = sdiv exact i32 %x, %y
1197 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1201 define i32 @add_known_nonzero(i32 %xx, i32 %y) {
1202 ; X86-LABEL: add_known_nonzero:
1204 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1205 ; X86-NEXT: orl $1, %eax
1206 ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
1207 ; X86-NEXT: rep bsfl %eax, %eax
1210 ; X64-LABEL: add_known_nonzero:
1212 ; X64-NEXT: orl $1, %edi
1213 ; X64-NEXT: addl %esi, %edi
1214 ; X64-NEXT: rep bsfl %edi, %eax
1217 %z = add nuw i32 %x, %y
1218 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1222 define i32 @add_maybe_zero(i32 %xx, i32 %y) {
1223 ; X86-LABEL: add_maybe_zero:
1225 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1226 ; X86-NEXT: orl $1, %eax
1227 ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
1228 ; X86-NEXT: je .LBB41_1
1229 ; X86-NEXT: # %bb.2: # %cond.false
1230 ; X86-NEXT: rep bsfl %eax, %eax
1232 ; X86-NEXT: .LBB41_1:
1233 ; X86-NEXT: movl $32, %eax
1236 ; X64-LABEL: add_maybe_zero:
1238 ; X64-NEXT: orl $1, %edi
1239 ; X64-NEXT: addl %esi, %edi
1240 ; X64-NEXT: je .LBB41_1
1241 ; X64-NEXT: # %bb.2: # %cond.false
1242 ; X64-NEXT: rep bsfl %edi, %eax
1244 ; X64-NEXT: .LBB41_1:
1245 ; X64-NEXT: movl $32, %eax
1248 %z = add nsw i32 %x, %y
1249 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1253 define i32 @sub_known_nonzero_neg_case(i32 %xx) {
1254 ; X86-LABEL: sub_known_nonzero_neg_case:
1256 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1257 ; X86-NEXT: movl $256, %eax # imm = 0x100
1258 ; X86-NEXT: shll %cl, %eax
1259 ; X86-NEXT: negl %eax
1260 ; X86-NEXT: rep bsfl %eax, %eax
1263 ; X64-LABEL: sub_known_nonzero_neg_case:
1265 ; X64-NEXT: movl %edi, %ecx
1266 ; X64-NEXT: movl $256, %eax # imm = 0x100
1267 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
1268 ; X64-NEXT: shll %cl, %eax
1269 ; X64-NEXT: negl %eax
1270 ; X64-NEXT: rep bsfl %eax, %eax
1272 %x = shl nuw nsw i32 256, %xx
1274 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1278 define i32 @sub_known_nonzero_ne_case(i32 %xx, i32 %yy) {
1279 ; X86-LABEL: sub_known_nonzero_ne_case:
1281 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1282 ; X86-NEXT: movl %eax, %ecx
1283 ; X86-NEXT: orl $64, %ecx
1284 ; X86-NEXT: andl $-65, %eax
1285 ; X86-NEXT: subl %ecx, %eax
1286 ; X86-NEXT: rep bsfl %eax, %eax
1289 ; X64-LABEL: sub_known_nonzero_ne_case:
1291 ; X64-NEXT: movl %edi, %eax
1292 ; X64-NEXT: orl $64, %eax
1293 ; X64-NEXT: andl $-65, %edi
1294 ; X64-NEXT: subl %eax, %edi
1295 ; X64-NEXT: rep bsfl %edi, %eax
1298 %y = and i32 %xx, -65
1300 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1304 define i32 @sub_maybe_zero(i32 %x) {
1305 ; X86-LABEL: sub_maybe_zero:
1307 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1308 ; X86-NEXT: movl %ecx, %eax
1309 ; X86-NEXT: orl $64, %eax
1310 ; X86-NEXT: subl %ecx, %eax
1311 ; X86-NEXT: je .LBB44_1
1312 ; X86-NEXT: # %bb.2: # %cond.false
1313 ; X86-NEXT: rep bsfl %eax, %eax
1315 ; X86-NEXT: .LBB44_1:
1316 ; X86-NEXT: movl $32, %eax
1319 ; X64-LABEL: sub_maybe_zero:
1321 ; X64-NEXT: movl %edi, %eax
1322 ; X64-NEXT: orl $64, %eax
1323 ; X64-NEXT: subl %edi, %eax
1324 ; X64-NEXT: je .LBB44_1
1325 ; X64-NEXT: # %bb.2: # %cond.false
1326 ; X64-NEXT: rep bsfl %eax, %eax
1328 ; X64-NEXT: .LBB44_1:
1329 ; X64-NEXT: movl $32, %eax
1333 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1337 define i32 @sub_maybe_zero2(i32 %x) {
1338 ; X86-LABEL: sub_maybe_zero2:
1340 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1341 ; X86-NEXT: negl %eax
1342 ; X86-NEXT: je .LBB45_1
1343 ; X86-NEXT: # %bb.2: # %cond.false
1344 ; X86-NEXT: rep bsfl %eax, %eax
1346 ; X86-NEXT: .LBB45_1:
1347 ; X86-NEXT: movl $32, %eax
1350 ; X64-LABEL: sub_maybe_zero2:
1352 ; X64-NEXT: negl %edi
1353 ; X64-NEXT: je .LBB45_1
1354 ; X64-NEXT: # %bb.2: # %cond.false
1355 ; X64-NEXT: rep bsfl %edi, %eax
1357 ; X64-NEXT: .LBB45_1:
1358 ; X64-NEXT: movl $32, %eax
1361 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1365 define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
1366 ; X86-LABEL: mul_known_nonzero_nsw:
1368 ; X86-NEXT: movl $256, %eax # imm = 0x100
1369 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
1370 ; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
1371 ; X86-NEXT: testl %eax, %eax
1372 ; X86-NEXT: je .LBB46_1
1373 ; X86-NEXT: # %bb.2: # %cond.false
1374 ; X86-NEXT: rep bsfl %eax, %eax
1376 ; X86-NEXT: .LBB46_1:
1377 ; X86-NEXT: movl $32, %eax
1380 ; X64-LABEL: mul_known_nonzero_nsw:
1382 ; X64-NEXT: orl $256, %esi # imm = 0x100
1383 ; X64-NEXT: imull %edi, %esi
1384 ; X64-NEXT: testl %esi, %esi
1385 ; X64-NEXT: je .LBB46_1
1386 ; X64-NEXT: # %bb.2: # %cond.false
1387 ; X64-NEXT: rep bsfl %esi, %eax
1389 ; X64-NEXT: .LBB46_1:
1390 ; X64-NEXT: movl $32, %eax
1392 %y = or i32 %yy, 256
1393 %z = mul nsw i32 %y, %x
1394 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1398 define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
1399 ; X86-LABEL: mul_known_nonzero_nuw:
1401 ; X86-NEXT: movl $256, %eax # imm = 0x100
1402 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
1403 ; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
1404 ; X86-NEXT: testl %eax, %eax
1405 ; X86-NEXT: je .LBB47_1
1406 ; X86-NEXT: # %bb.2: # %cond.false
1407 ; X86-NEXT: rep bsfl %eax, %eax
1409 ; X86-NEXT: .LBB47_1:
1410 ; X86-NEXT: movl $32, %eax
1413 ; X64-LABEL: mul_known_nonzero_nuw:
1415 ; X64-NEXT: orl $256, %esi # imm = 0x100
1416 ; X64-NEXT: imull %edi, %esi
1417 ; X64-NEXT: testl %esi, %esi
1418 ; X64-NEXT: je .LBB47_1
1419 ; X64-NEXT: # %bb.2: # %cond.false
1420 ; X64-NEXT: rep bsfl %esi, %eax
1422 ; X64-NEXT: .LBB47_1:
1423 ; X64-NEXT: movl $32, %eax
1425 %y = or i32 %yy, 256
1426 %z = mul nuw i32 %y, %x
1427 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1431 define i32 @mul_maybe_zero(i32 %x, i32 %y) {
1432 ; X86-LABEL: mul_maybe_zero:
1434 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1435 ; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
1436 ; X86-NEXT: testl %eax, %eax
1437 ; X86-NEXT: je .LBB48_1
1438 ; X86-NEXT: # %bb.2: # %cond.false
1439 ; X86-NEXT: rep bsfl %eax, %eax
1441 ; X86-NEXT: .LBB48_1:
1442 ; X86-NEXT: movl $32, %eax
1445 ; X64-LABEL: mul_maybe_zero:
1447 ; X64-NEXT: imull %esi, %edi
1448 ; X64-NEXT: testl %edi, %edi
1449 ; X64-NEXT: je .LBB48_1
1450 ; X64-NEXT: # %bb.2: # %cond.false
1451 ; X64-NEXT: rep bsfl %edi, %eax
1453 ; X64-NEXT: .LBB48_1:
1454 ; X64-NEXT: movl $32, %eax
1456 %z = mul nuw nsw i32 %y, %x
1457 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1461 define i32 @bitcast_known_nonzero(<2 x i16> %xx) {
1462 ; X86-LABEL: bitcast_known_nonzero:
1464 ; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1465 ; X86-NEXT: pslld $23, %xmm0
1466 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1467 ; X86-NEXT: cvttps2dq %xmm0, %xmm0
1468 ; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1469 ; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,256,u,u,u,u,u,u]
1470 ; X86-NEXT: movd %xmm0, %eax
1471 ; X86-NEXT: bsfl %eax, %ecx
1472 ; X86-NEXT: movl $32, %eax
1473 ; X86-NEXT: cmovnel %ecx, %eax
1476 ; X64-LABEL: bitcast_known_nonzero:
1478 ; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1479 ; X64-NEXT: vpslld $23, %xmm0, %xmm0
1480 ; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1481 ; X64-NEXT: vcvttps2dq %xmm0, %xmm0
1482 ; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1483 ; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u]
1484 ; X64-NEXT: vmovd %xmm0, %eax
1485 ; X64-NEXT: bsfl %eax, %ecx
1486 ; X64-NEXT: movl $32, %eax
1487 ; X64-NEXT: cmovnel %ecx, %eax
1489 %x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx
1490 %z = bitcast <2 x i16> %x to i32
1491 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1495 define i32 @bitcast_maybe_zero(<2 x i16> %x) {
1496 ; X86-LABEL: bitcast_maybe_zero:
1498 ; X86-NEXT: movd %xmm0, %eax
1499 ; X86-NEXT: testl %eax, %eax
1500 ; X86-NEXT: je .LBB50_1
1501 ; X86-NEXT: # %bb.2: # %cond.false
1502 ; X86-NEXT: rep bsfl %eax, %eax
1504 ; X86-NEXT: .LBB50_1:
1505 ; X86-NEXT: movl $32, %eax
1508 ; X64-LABEL: bitcast_maybe_zero:
1510 ; X64-NEXT: vmovd %xmm0, %eax
1511 ; X64-NEXT: testl %eax, %eax
1512 ; X64-NEXT: je .LBB50_1
1513 ; X64-NEXT: # %bb.2: # %cond.false
1514 ; X64-NEXT: rep bsfl %eax, %eax
1516 ; X64-NEXT: .LBB50_1:
1517 ; X64-NEXT: movl $32, %eax
1519 %z = bitcast <2 x i16> %x to i32
1520 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1524 define i32 @bitcast_from_float(float %x) {
1525 ; X86-LABEL: bitcast_from_float:
1527 ; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1528 ; X86-NEXT: movd %xmm0, %eax
1529 ; X86-NEXT: testl %eax, %eax
1530 ; X86-NEXT: je .LBB51_1
1531 ; X86-NEXT: # %bb.2: # %cond.false
1532 ; X86-NEXT: rep bsfl %eax, %eax
1534 ; X86-NEXT: .LBB51_1:
1535 ; X86-NEXT: movl $32, %eax
1538 ; X64-LABEL: bitcast_from_float:
1540 ; X64-NEXT: vmovd %xmm0, %eax
1541 ; X64-NEXT: testl %eax, %eax
1542 ; X64-NEXT: je .LBB51_1
1543 ; X64-NEXT: # %bb.2: # %cond.false
1544 ; X64-NEXT: rep bsfl %eax, %eax
1546 ; X64-NEXT: .LBB51_1:
1547 ; X64-NEXT: movl $32, %eax
1549 %z = bitcast float %x to i32
1550 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1554 define i32 @zext_known_nonzero(i16 %xx) {
1555 ; X86-LABEL: zext_known_nonzero:
1557 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1558 ; X86-NEXT: movl $256, %eax # imm = 0x100
1559 ; X86-NEXT: shll %cl, %eax
1560 ; X86-NEXT: movzwl %ax, %eax
1561 ; X86-NEXT: rep bsfl %eax, %eax
1564 ; X64-LABEL: zext_known_nonzero:
1566 ; X64-NEXT: movl %edi, %ecx
1567 ; X64-NEXT: movl $256, %eax # imm = 0x100
1568 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
1569 ; X64-NEXT: shll %cl, %eax
1570 ; X64-NEXT: movzwl %ax, %eax
1571 ; X64-NEXT: rep bsfl %eax, %eax
1573 %x = shl nuw nsw i16 256, %xx
1574 %z = zext i16 %x to i32
1575 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1579 define i32 @zext_maybe_zero(i16 %x) {
1580 ; X86-LABEL: zext_maybe_zero:
1582 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1583 ; X86-NEXT: testw %ax, %ax
1584 ; X86-NEXT: je .LBB53_1
1585 ; X86-NEXT: # %bb.2: # %cond.false
1586 ; X86-NEXT: movzwl %ax, %eax
1587 ; X86-NEXT: rep bsfl %eax, %eax
1589 ; X86-NEXT: .LBB53_1:
1590 ; X86-NEXT: movl $32, %eax
1593 ; X64-LABEL: zext_maybe_zero:
1595 ; X64-NEXT: testw %di, %di
1596 ; X64-NEXT: je .LBB53_1
1597 ; X64-NEXT: # %bb.2: # %cond.false
1598 ; X64-NEXT: movzwl %di, %eax
1599 ; X64-NEXT: rep bsfl %eax, %eax
1601 ; X64-NEXT: .LBB53_1:
1602 ; X64-NEXT: movl $32, %eax
1604 %z = zext i16 %x to i32
1605 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1609 define i32 @sext_known_nonzero(i16 %xx) {
1610 ; X86-LABEL: sext_known_nonzero:
1612 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1613 ; X86-NEXT: movl $256, %eax # imm = 0x100
1614 ; X86-NEXT: shll %cl, %eax
1615 ; X86-NEXT: movzwl %ax, %eax
1616 ; X86-NEXT: rep bsfl %eax, %eax
1619 ; X64-LABEL: sext_known_nonzero:
1621 ; X64-NEXT: movl %edi, %ecx
1622 ; X64-NEXT: movl $256, %eax # imm = 0x100
1623 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
1624 ; X64-NEXT: shll %cl, %eax
1625 ; X64-NEXT: movzwl %ax, %eax
1626 ; X64-NEXT: rep bsfl %eax, %eax
1628 %x = shl nuw nsw i16 256, %xx
1629 %z = sext i16 %x to i32
1630 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1634 define i32 @sext_maybe_zero(i16 %x) {
1635 ; X86-LABEL: sext_maybe_zero:
1637 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
1638 ; X86-NEXT: testl %eax, %eax
1639 ; X86-NEXT: je .LBB55_1
1640 ; X86-NEXT: # %bb.2: # %cond.false
1641 ; X86-NEXT: rep bsfl %eax, %eax
1643 ; X86-NEXT: .LBB55_1:
1644 ; X86-NEXT: movl $32, %eax
1647 ; X64-LABEL: sext_maybe_zero:
1649 ; X64-NEXT: testw %di, %di
1650 ; X64-NEXT: je .LBB55_1
1651 ; X64-NEXT: # %bb.2: # %cond.false
1652 ; X64-NEXT: movswl %di, %eax
1653 ; X64-NEXT: rep bsfl %eax, %eax
1655 ; X64-NEXT: .LBB55_1:
1656 ; X64-NEXT: movl $32, %eax
1658 %z = sext i16 %x to i32
1659 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)