1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; Test patterns which generates lzcnt instructions.
3 ; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt))
4 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
5 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
6 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
7 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
9 ; Test one 32-bit input, output is 32-bit, no transformations expected.
10 define i32 @test_zext_cmp0(i32 %a) {
11 ; ALL-LABEL: test_zext_cmp0:
12 ; ALL: # %bb.0: # %entry
13 ; ALL-NEXT: xorl %eax, %eax
14 ; ALL-NEXT: testl %edi, %edi
18 %cmp = icmp eq i32 %a, 0
19 %conv = zext i1 %cmp to i32
23 ; Test two 32-bit inputs, output is 32-bit.
24 define i32 @test_zext_cmp1(i32 %a, i32 %b) {
25 ; FASTLZCNT-LABEL: test_zext_cmp1:
27 ; FASTLZCNT-NEXT: lzcntl %edi, %ecx
28 ; FASTLZCNT-NEXT: lzcntl %esi, %eax
29 ; FASTLZCNT-NEXT: orl %ecx, %eax
30 ; FASTLZCNT-NEXT: shrl $5, %eax
31 ; FASTLZCNT-NEXT: retq
33 ; NOFASTLZCNT-LABEL: test_zext_cmp1:
34 ; NOFASTLZCNT: # %bb.0:
35 ; NOFASTLZCNT-NEXT: testl %edi, %edi
36 ; NOFASTLZCNT-NEXT: sete %al
37 ; NOFASTLZCNT-NEXT: testl %esi, %esi
38 ; NOFASTLZCNT-NEXT: sete %cl
39 ; NOFASTLZCNT-NEXT: orb %al, %cl
40 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
41 ; NOFASTLZCNT-NEXT: retq
42 %cmp = icmp eq i32 %a, 0
43 %cmp1 = icmp eq i32 %b, 0
44 %or = or i1 %cmp, %cmp1
45 %lor.ext = zext i1 %or to i32
49 ; Test two 64-bit inputs, output is 64-bit.
50 define i64 @test_zext_cmp2(i64 %a, i64 %b) {
51 ; FASTLZCNT-LABEL: test_zext_cmp2:
53 ; FASTLZCNT-NEXT: lzcntq %rdi, %rcx
54 ; FASTLZCNT-NEXT: lzcntq %rsi, %rax
55 ; FASTLZCNT-NEXT: orl %ecx, %eax
56 ; FASTLZCNT-NEXT: shrl $6, %eax
57 ; FASTLZCNT-NEXT: retq
59 ; NOFASTLZCNT-LABEL: test_zext_cmp2:
60 ; NOFASTLZCNT: # %bb.0:
61 ; NOFASTLZCNT-NEXT: testq %rdi, %rdi
62 ; NOFASTLZCNT-NEXT: sete %al
63 ; NOFASTLZCNT-NEXT: testq %rsi, %rsi
64 ; NOFASTLZCNT-NEXT: sete %cl
65 ; NOFASTLZCNT-NEXT: orb %al, %cl
66 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
67 ; NOFASTLZCNT-NEXT: retq
68 %cmp = icmp eq i64 %a, 0
69 %cmp1 = icmp eq i64 %b, 0
70 %or = or i1 %cmp, %cmp1
71 %lor.ext = zext i1 %or to i64
75 ; Test two 16-bit inputs, output is 16-bit.
76 ; The transform is disabled for the 16-bit case, as we still have to clear the
77 ; upper 16-bits, adding one more instruction.
78 define i16 @test_zext_cmp3(i16 %a, i16 %b) {
79 ; ALL-LABEL: test_zext_cmp3:
81 ; ALL-NEXT: testw %di, %di
83 ; ALL-NEXT: testw %si, %si
85 ; ALL-NEXT: orb %al, %cl
86 ; ALL-NEXT: movzbl %cl, %eax
87 ; ALL-NEXT: # kill: def $ax killed $ax killed $eax
89 %cmp = icmp eq i16 %a, 0
90 %cmp1 = icmp eq i16 %b, 0
91 %or = or i1 %cmp, %cmp1
92 %lor.ext = zext i1 %or to i16
96 ; Test two 32-bit inputs, output is 64-bit.
97 define i64 @test_zext_cmp4(i32 %a, i32 %b) {
98 ; FASTLZCNT-LABEL: test_zext_cmp4:
99 ; FASTLZCNT: # %bb.0: # %entry
100 ; FASTLZCNT-NEXT: lzcntl %edi, %ecx
101 ; FASTLZCNT-NEXT: lzcntl %esi, %eax
102 ; FASTLZCNT-NEXT: orl %ecx, %eax
103 ; FASTLZCNT-NEXT: shrl $5, %eax
104 ; FASTLZCNT-NEXT: retq
106 ; NOFASTLZCNT-LABEL: test_zext_cmp4:
107 ; NOFASTLZCNT: # %bb.0: # %entry
108 ; NOFASTLZCNT-NEXT: testl %edi, %edi
109 ; NOFASTLZCNT-NEXT: sete %al
110 ; NOFASTLZCNT-NEXT: testl %esi, %esi
111 ; NOFASTLZCNT-NEXT: sete %cl
112 ; NOFASTLZCNT-NEXT: orb %al, %cl
113 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
114 ; NOFASTLZCNT-NEXT: retq
116 %cmp = icmp eq i32 %a, 0
117 %cmp1 = icmp eq i32 %b, 0
118 %0 = or i1 %cmp, %cmp1
119 %conv = zext i1 %0 to i64
123 ; Test two 64-bit inputs, output is 32-bit.
124 define i32 @test_zext_cmp5(i64 %a, i64 %b) {
125 ; FASTLZCNT-LABEL: test_zext_cmp5:
126 ; FASTLZCNT: # %bb.0: # %entry
127 ; FASTLZCNT-NEXT: lzcntq %rdi, %rcx
128 ; FASTLZCNT-NEXT: lzcntq %rsi, %rax
129 ; FASTLZCNT-NEXT: orl %ecx, %eax
130 ; FASTLZCNT-NEXT: shrl $6, %eax
131 ; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax
132 ; FASTLZCNT-NEXT: retq
134 ; NOFASTLZCNT-LABEL: test_zext_cmp5:
135 ; NOFASTLZCNT: # %bb.0: # %entry
136 ; NOFASTLZCNT-NEXT: testq %rdi, %rdi
137 ; NOFASTLZCNT-NEXT: sete %al
138 ; NOFASTLZCNT-NEXT: testq %rsi, %rsi
139 ; NOFASTLZCNT-NEXT: sete %cl
140 ; NOFASTLZCNT-NEXT: orb %al, %cl
141 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
142 ; NOFASTLZCNT-NEXT: retq
144 %cmp = icmp eq i64 %a, 0
145 %cmp1 = icmp eq i64 %b, 0
146 %0 = or i1 %cmp, %cmp1
147 %lor.ext = zext i1 %0 to i32
151 ; Test three 32-bit inputs, output is 32-bit.
152 define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) {
153 ; FASTLZCNT-LABEL: test_zext_cmp6:
154 ; FASTLZCNT: # %bb.0: # %entry
155 ; FASTLZCNT-NEXT: lzcntl %edi, %eax
156 ; FASTLZCNT-NEXT: lzcntl %esi, %ecx
157 ; FASTLZCNT-NEXT: orl %eax, %ecx
158 ; FASTLZCNT-NEXT: lzcntl %edx, %eax
159 ; FASTLZCNT-NEXT: orl %ecx, %eax
160 ; FASTLZCNT-NEXT: shrl $5, %eax
161 ; FASTLZCNT-NEXT: retq
163 ; NOFASTLZCNT-LABEL: test_zext_cmp6:
164 ; NOFASTLZCNT: # %bb.0: # %entry
165 ; NOFASTLZCNT-NEXT: testl %edi, %edi
166 ; NOFASTLZCNT-NEXT: sete %al
167 ; NOFASTLZCNT-NEXT: testl %esi, %esi
168 ; NOFASTLZCNT-NEXT: sete %cl
169 ; NOFASTLZCNT-NEXT: orb %al, %cl
170 ; NOFASTLZCNT-NEXT: testl %edx, %edx
171 ; NOFASTLZCNT-NEXT: sete %al
172 ; NOFASTLZCNT-NEXT: orb %cl, %al
173 ; NOFASTLZCNT-NEXT: movzbl %al, %eax
174 ; NOFASTLZCNT-NEXT: retq
176 %cmp = icmp eq i32 %a, 0
177 %cmp1 = icmp eq i32 %b, 0
178 %or.cond = or i1 %cmp, %cmp1
179 %cmp2 = icmp eq i32 %c, 0
180 %.cmp2 = or i1 %or.cond, %cmp2
181 %lor.ext = zext i1 %.cmp2 to i32
185 ; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test,
186 ; %.cmp2 inputs' order is inverted.
187 define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) {
188 ; FASTLZCNT-LABEL: test_zext_cmp7:
189 ; FASTLZCNT: # %bb.0: # %entry
190 ; FASTLZCNT-NEXT: lzcntl %edi, %eax
191 ; FASTLZCNT-NEXT: lzcntl %esi, %ecx
192 ; FASTLZCNT-NEXT: orl %eax, %ecx
193 ; FASTLZCNT-NEXT: lzcntl %edx, %eax
194 ; FASTLZCNT-NEXT: orl %ecx, %eax
195 ; FASTLZCNT-NEXT: shrl $5, %eax
196 ; FASTLZCNT-NEXT: retq
198 ; NOFASTLZCNT-LABEL: test_zext_cmp7:
199 ; NOFASTLZCNT: # %bb.0: # %entry
200 ; NOFASTLZCNT-NEXT: testl %edi, %edi
201 ; NOFASTLZCNT-NEXT: sete %al
202 ; NOFASTLZCNT-NEXT: testl %esi, %esi
203 ; NOFASTLZCNT-NEXT: sete %cl
204 ; NOFASTLZCNT-NEXT: orb %al, %cl
205 ; NOFASTLZCNT-NEXT: testl %edx, %edx
206 ; NOFASTLZCNT-NEXT: sete %al
207 ; NOFASTLZCNT-NEXT: orb %cl, %al
208 ; NOFASTLZCNT-NEXT: movzbl %al, %eax
209 ; NOFASTLZCNT-NEXT: retq
211 %cmp = icmp eq i32 %a, 0
212 %cmp1 = icmp eq i32 %b, 0
213 %or.cond = or i1 %cmp, %cmp1
214 %cmp2 = icmp eq i32 %c, 0
215 %.cmp2 = or i1 %cmp2, %or.cond
216 %lor.ext = zext i1 %.cmp2 to i32
220 ; Test four 32-bit inputs, output is 32-bit.
221 define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) {
222 ; FASTLZCNT-LABEL: test_zext_cmp8:
223 ; FASTLZCNT: # %bb.0: # %entry
224 ; FASTLZCNT-NEXT: lzcntl %edi, %eax
225 ; FASTLZCNT-NEXT: lzcntl %esi, %esi
226 ; FASTLZCNT-NEXT: lzcntl %edx, %edx
227 ; FASTLZCNT-NEXT: orl %eax, %esi
228 ; FASTLZCNT-NEXT: lzcntl %ecx, %eax
229 ; FASTLZCNT-NEXT: orl %edx, %eax
230 ; FASTLZCNT-NEXT: orl %esi, %eax
231 ; FASTLZCNT-NEXT: shrl $5, %eax
232 ; FASTLZCNT-NEXT: retq
234 ; NOFASTLZCNT-LABEL: test_zext_cmp8:
235 ; NOFASTLZCNT: # %bb.0: # %entry
236 ; NOFASTLZCNT-NEXT: testl %edi, %edi
237 ; NOFASTLZCNT-NEXT: sete %dil
238 ; NOFASTLZCNT-NEXT: testl %esi, %esi
239 ; NOFASTLZCNT-NEXT: sete %al
240 ; NOFASTLZCNT-NEXT: orb %dil, %al
241 ; NOFASTLZCNT-NEXT: testl %edx, %edx
242 ; NOFASTLZCNT-NEXT: sete %dl
243 ; NOFASTLZCNT-NEXT: testl %ecx, %ecx
244 ; NOFASTLZCNT-NEXT: sete %cl
245 ; NOFASTLZCNT-NEXT: orb %dl, %cl
246 ; NOFASTLZCNT-NEXT: orb %al, %cl
247 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
248 ; NOFASTLZCNT-NEXT: retq
250 %cmp = icmp eq i32 %a, 0
251 %cmp1 = icmp eq i32 %b, 0
252 %or.cond = or i1 %cmp, %cmp1
253 %cmp3 = icmp eq i32 %c, 0
254 %or.cond5 = or i1 %or.cond, %cmp3
255 %cmp4 = icmp eq i32 %d, 0
256 %.cmp4 = or i1 %or.cond5, %cmp4
257 %lor.ext = zext i1 %.cmp4 to i32
261 ; Test one 32-bit input, one 64-bit input, output is 32-bit.
262 define i32 @test_zext_cmp9(i32 %a, i64 %b) {
263 ; FASTLZCNT-LABEL: test_zext_cmp9:
264 ; FASTLZCNT: # %bb.0: # %entry
265 ; FASTLZCNT-NEXT: lzcntq %rsi, %rax
266 ; FASTLZCNT-NEXT: lzcntl %edi, %ecx
267 ; FASTLZCNT-NEXT: shrl $5, %ecx
268 ; FASTLZCNT-NEXT: shrl $6, %eax
269 ; FASTLZCNT-NEXT: orl %ecx, %eax
270 ; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax
271 ; FASTLZCNT-NEXT: retq
273 ; NOFASTLZCNT-LABEL: test_zext_cmp9:
274 ; NOFASTLZCNT: # %bb.0: # %entry
275 ; NOFASTLZCNT-NEXT: testl %edi, %edi
276 ; NOFASTLZCNT-NEXT: sete %al
277 ; NOFASTLZCNT-NEXT: testq %rsi, %rsi
278 ; NOFASTLZCNT-NEXT: sete %cl
279 ; NOFASTLZCNT-NEXT: orb %al, %cl
280 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
281 ; NOFASTLZCNT-NEXT: retq
283 %cmp = icmp eq i32 %a, 0
284 %cmp1 = icmp eq i64 %b, 0
285 %0 = or i1 %cmp, %cmp1
286 %lor.ext = zext i1 %0 to i32
290 ; Test 2 128-bit inputs, output is 32-bit, no transformations expected.
291 define i32 @test_zext_cmp10(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) {
292 ; ALL-LABEL: test_zext_cmp10:
293 ; ALL: # %bb.0: # %entry
294 ; ALL-NEXT: orq %rsi, %rdi
296 ; ALL-NEXT: orq %rcx, %rdx
298 ; ALL-NEXT: orb %al, %cl
299 ; ALL-NEXT: movzbl %cl, %eax
302 %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128
303 %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64
304 %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128
305 %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext
306 %b.sroa.2.0.insert.ext = zext i64 %b.coerce1 to i128
307 %b.sroa.2.0.insert.shift = shl nuw i128 %b.sroa.2.0.insert.ext, 64
308 %b.sroa.0.0.insert.ext = zext i64 %b.coerce0 to i128
309 %b.sroa.0.0.insert.insert = or i128 %b.sroa.2.0.insert.shift, %b.sroa.0.0.insert.ext
310 %cmp = icmp eq i128 %a.sroa.0.0.insert.insert, 0
311 %cmp3 = icmp eq i128 %b.sroa.0.0.insert.insert, 0
312 %0 = or i1 %cmp, %cmp3
313 %lor.ext = zext i1 %0 to i32
317 ; PR31902 Fix a crash in combineOrCmpEqZeroToCtlzSrl under fast math.
318 define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" {
320 ; ALL-LABEL: test_zext_cmp11:
321 ; ALL: # %bb.0: # %entry
322 ; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
323 ; ALL-NEXT: vucomisd %xmm2, %xmm0
325 ; ALL-NEXT: vucomisd %xmm2, %xmm1
327 ; ALL-NEXT: orb %al, %cl
328 ; ALL-NEXT: movzbl %cl, %eax
331 %cmp = fcmp fast oeq double %a, 0.000000e+00
332 %cmp1 = fcmp fast oeq double %b, 0.000000e+00
333 %0 = or i1 %cmp, %cmp1
334 %conv = zext i1 %0 to i32