1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; Test patterns which generates lzcnt instructions.
3 ; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt))
4 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
5 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
6 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
7 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
8 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
9 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
11 ; Test one 32-bit input, output is 32-bit, no transformations expected.
12 define i32 @test_zext_cmp0(i32 %a) {
13 ; ALL-LABEL: test_zext_cmp0:
14 ; ALL: # %bb.0: # %entry
15 ; ALL-NEXT: xorl %eax, %eax
16 ; ALL-NEXT: testl %edi, %edi
20 %cmp = icmp eq i32 %a, 0
21 %conv = zext i1 %cmp to i32
25 ; Test two 32-bit inputs, output is 32-bit.
26 define i32 @test_zext_cmp1(i32 %a, i32 %b) {
27 ; FASTLZCNT-LABEL: test_zext_cmp1:
29 ; FASTLZCNT-NEXT: lzcntl %edi, %ecx
30 ; FASTLZCNT-NEXT: lzcntl %esi, %eax
31 ; FASTLZCNT-NEXT: orl %ecx, %eax
32 ; FASTLZCNT-NEXT: shrl $5, %eax
33 ; FASTLZCNT-NEXT: retq
35 ; NOFASTLZCNT-LABEL: test_zext_cmp1:
36 ; NOFASTLZCNT: # %bb.0:
37 ; NOFASTLZCNT-NEXT: testl %edi, %edi
38 ; NOFASTLZCNT-NEXT: sete %al
39 ; NOFASTLZCNT-NEXT: testl %esi, %esi
40 ; NOFASTLZCNT-NEXT: sete %cl
41 ; NOFASTLZCNT-NEXT: orb %al, %cl
42 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
43 ; NOFASTLZCNT-NEXT: retq
44 %cmp = icmp eq i32 %a, 0
45 %cmp1 = icmp eq i32 %b, 0
46 %or = or i1 %cmp, %cmp1
47 %lor.ext = zext i1 %or to i32
51 ; Test two 64-bit inputs, output is 64-bit.
52 define i64 @test_zext_cmp2(i64 %a, i64 %b) {
53 ; FASTLZCNT-LABEL: test_zext_cmp2:
55 ; FASTLZCNT-NEXT: lzcntq %rdi, %rcx
56 ; FASTLZCNT-NEXT: lzcntq %rsi, %rax
57 ; FASTLZCNT-NEXT: orl %ecx, %eax
58 ; FASTLZCNT-NEXT: shrl $6, %eax
59 ; FASTLZCNT-NEXT: retq
61 ; NOFASTLZCNT-LABEL: test_zext_cmp2:
62 ; NOFASTLZCNT: # %bb.0:
63 ; NOFASTLZCNT-NEXT: testq %rdi, %rdi
64 ; NOFASTLZCNT-NEXT: sete %al
65 ; NOFASTLZCNT-NEXT: testq %rsi, %rsi
66 ; NOFASTLZCNT-NEXT: sete %cl
67 ; NOFASTLZCNT-NEXT: orb %al, %cl
68 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
69 ; NOFASTLZCNT-NEXT: retq
70 %cmp = icmp eq i64 %a, 0
71 %cmp1 = icmp eq i64 %b, 0
72 %or = or i1 %cmp, %cmp1
73 %lor.ext = zext i1 %or to i64
77 ; Test two 16-bit inputs, output is 16-bit.
78 ; The transform is disabled for the 16-bit case, as we still have to clear the
79 ; upper 16-bits, adding one more instruction.
80 define i16 @test_zext_cmp3(i16 %a, i16 %b) {
81 ; ALL-LABEL: test_zext_cmp3:
83 ; ALL-NEXT: testw %di, %di
85 ; ALL-NEXT: testw %si, %si
87 ; ALL-NEXT: orb %al, %cl
88 ; ALL-NEXT: movzbl %cl, %eax
89 ; ALL-NEXT: # kill: def $ax killed $ax killed $eax
91 %cmp = icmp eq i16 %a, 0
92 %cmp1 = icmp eq i16 %b, 0
93 %or = or i1 %cmp, %cmp1
94 %lor.ext = zext i1 %or to i16
98 ; Test two 32-bit inputs, output is 64-bit.
99 define i64 @test_zext_cmp4(i32 %a, i32 %b) {
100 ; FASTLZCNT-LABEL: test_zext_cmp4:
101 ; FASTLZCNT: # %bb.0: # %entry
102 ; FASTLZCNT-NEXT: lzcntl %edi, %ecx
103 ; FASTLZCNT-NEXT: lzcntl %esi, %eax
104 ; FASTLZCNT-NEXT: orl %ecx, %eax
105 ; FASTLZCNT-NEXT: shrl $5, %eax
106 ; FASTLZCNT-NEXT: retq
108 ; NOFASTLZCNT-LABEL: test_zext_cmp4:
109 ; NOFASTLZCNT: # %bb.0: # %entry
110 ; NOFASTLZCNT-NEXT: testl %edi, %edi
111 ; NOFASTLZCNT-NEXT: sete %al
112 ; NOFASTLZCNT-NEXT: testl %esi, %esi
113 ; NOFASTLZCNT-NEXT: sete %cl
114 ; NOFASTLZCNT-NEXT: orb %al, %cl
115 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
116 ; NOFASTLZCNT-NEXT: retq
118 %cmp = icmp eq i32 %a, 0
119 %cmp1 = icmp eq i32 %b, 0
120 %0 = or i1 %cmp, %cmp1
121 %conv = zext i1 %0 to i64
125 ; Test two 64-bit inputs, output is 32-bit.
126 define i32 @test_zext_cmp5(i64 %a, i64 %b) {
127 ; FASTLZCNT-LABEL: test_zext_cmp5:
128 ; FASTLZCNT: # %bb.0: # %entry
129 ; FASTLZCNT-NEXT: lzcntq %rdi, %rcx
130 ; FASTLZCNT-NEXT: lzcntq %rsi, %rax
131 ; FASTLZCNT-NEXT: orl %ecx, %eax
132 ; FASTLZCNT-NEXT: shrl $6, %eax
133 ; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax
134 ; FASTLZCNT-NEXT: retq
136 ; NOFASTLZCNT-LABEL: test_zext_cmp5:
137 ; NOFASTLZCNT: # %bb.0: # %entry
138 ; NOFASTLZCNT-NEXT: testq %rdi, %rdi
139 ; NOFASTLZCNT-NEXT: sete %al
140 ; NOFASTLZCNT-NEXT: testq %rsi, %rsi
141 ; NOFASTLZCNT-NEXT: sete %cl
142 ; NOFASTLZCNT-NEXT: orb %al, %cl
143 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
144 ; NOFASTLZCNT-NEXT: retq
146 %cmp = icmp eq i64 %a, 0
147 %cmp1 = icmp eq i64 %b, 0
148 %0 = or i1 %cmp, %cmp1
149 %lor.ext = zext i1 %0 to i32
153 ; Test three 32-bit inputs, output is 32-bit.
154 define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) {
155 ; FASTLZCNT-LABEL: test_zext_cmp6:
156 ; FASTLZCNT: # %bb.0: # %entry
157 ; FASTLZCNT-NEXT: lzcntl %edi, %eax
158 ; FASTLZCNT-NEXT: lzcntl %esi, %ecx
159 ; FASTLZCNT-NEXT: orl %eax, %ecx
160 ; FASTLZCNT-NEXT: lzcntl %edx, %eax
161 ; FASTLZCNT-NEXT: orl %ecx, %eax
162 ; FASTLZCNT-NEXT: shrl $5, %eax
163 ; FASTLZCNT-NEXT: retq
165 ; NOFASTLZCNT-LABEL: test_zext_cmp6:
166 ; NOFASTLZCNT: # %bb.0: # %entry
167 ; NOFASTLZCNT-NEXT: testl %edi, %edi
168 ; NOFASTLZCNT-NEXT: sete %al
169 ; NOFASTLZCNT-NEXT: testl %esi, %esi
170 ; NOFASTLZCNT-NEXT: sete %cl
171 ; NOFASTLZCNT-NEXT: orb %al, %cl
172 ; NOFASTLZCNT-NEXT: testl %edx, %edx
173 ; NOFASTLZCNT-NEXT: sete %al
174 ; NOFASTLZCNT-NEXT: orb %cl, %al
175 ; NOFASTLZCNT-NEXT: movzbl %al, %eax
176 ; NOFASTLZCNT-NEXT: retq
178 %cmp = icmp eq i32 %a, 0
179 %cmp1 = icmp eq i32 %b, 0
180 %or.cond = or i1 %cmp, %cmp1
181 %cmp2 = icmp eq i32 %c, 0
182 %.cmp2 = or i1 %or.cond, %cmp2
183 %lor.ext = zext i1 %.cmp2 to i32
187 ; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test,
188 ; %.cmp2 inputs' order is inverted.
189 define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) {
190 ; FASTLZCNT-LABEL: test_zext_cmp7:
191 ; FASTLZCNT: # %bb.0: # %entry
192 ; FASTLZCNT-NEXT: lzcntl %edi, %eax
193 ; FASTLZCNT-NEXT: lzcntl %esi, %ecx
194 ; FASTLZCNT-NEXT: orl %eax, %ecx
195 ; FASTLZCNT-NEXT: lzcntl %edx, %eax
196 ; FASTLZCNT-NEXT: orl %ecx, %eax
197 ; FASTLZCNT-NEXT: shrl $5, %eax
198 ; FASTLZCNT-NEXT: retq
200 ; NOFASTLZCNT-LABEL: test_zext_cmp7:
201 ; NOFASTLZCNT: # %bb.0: # %entry
202 ; NOFASTLZCNT-NEXT: testl %edi, %edi
203 ; NOFASTLZCNT-NEXT: sete %al
204 ; NOFASTLZCNT-NEXT: testl %esi, %esi
205 ; NOFASTLZCNT-NEXT: sete %cl
206 ; NOFASTLZCNT-NEXT: orb %al, %cl
207 ; NOFASTLZCNT-NEXT: testl %edx, %edx
208 ; NOFASTLZCNT-NEXT: sete %al
209 ; NOFASTLZCNT-NEXT: orb %cl, %al
210 ; NOFASTLZCNT-NEXT: movzbl %al, %eax
211 ; NOFASTLZCNT-NEXT: retq
213 %cmp = icmp eq i32 %a, 0
214 %cmp1 = icmp eq i32 %b, 0
215 %or.cond = or i1 %cmp, %cmp1
216 %cmp2 = icmp eq i32 %c, 0
217 %.cmp2 = or i1 %cmp2, %or.cond
218 %lor.ext = zext i1 %.cmp2 to i32
222 ; Test four 32-bit inputs, output is 32-bit.
223 define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) {
224 ; FASTLZCNT-LABEL: test_zext_cmp8:
225 ; FASTLZCNT: # %bb.0: # %entry
226 ; FASTLZCNT-NEXT: lzcntl %edi, %eax
227 ; FASTLZCNT-NEXT: lzcntl %esi, %esi
228 ; FASTLZCNT-NEXT: lzcntl %edx, %edx
229 ; FASTLZCNT-NEXT: orl %eax, %esi
230 ; FASTLZCNT-NEXT: lzcntl %ecx, %eax
231 ; FASTLZCNT-NEXT: orl %edx, %eax
232 ; FASTLZCNT-NEXT: orl %esi, %eax
233 ; FASTLZCNT-NEXT: shrl $5, %eax
234 ; FASTLZCNT-NEXT: retq
236 ; NOFASTLZCNT-LABEL: test_zext_cmp8:
237 ; NOFASTLZCNT: # %bb.0: # %entry
238 ; NOFASTLZCNT-NEXT: testl %edi, %edi
239 ; NOFASTLZCNT-NEXT: sete %dil
240 ; NOFASTLZCNT-NEXT: testl %esi, %esi
241 ; NOFASTLZCNT-NEXT: sete %al
242 ; NOFASTLZCNT-NEXT: orb %dil, %al
243 ; NOFASTLZCNT-NEXT: testl %edx, %edx
244 ; NOFASTLZCNT-NEXT: sete %dl
245 ; NOFASTLZCNT-NEXT: testl %ecx, %ecx
246 ; NOFASTLZCNT-NEXT: sete %cl
247 ; NOFASTLZCNT-NEXT: orb %dl, %cl
248 ; NOFASTLZCNT-NEXT: orb %al, %cl
249 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
250 ; NOFASTLZCNT-NEXT: retq
252 %cmp = icmp eq i32 %a, 0
253 %cmp1 = icmp eq i32 %b, 0
254 %or.cond = or i1 %cmp, %cmp1
255 %cmp3 = icmp eq i32 %c, 0
256 %or.cond5 = or i1 %or.cond, %cmp3
257 %cmp4 = icmp eq i32 %d, 0
258 %.cmp4 = or i1 %or.cond5, %cmp4
259 %lor.ext = zext i1 %.cmp4 to i32
263 ; Test one 32-bit input, one 64-bit input, output is 32-bit.
264 define i32 @test_zext_cmp9(i32 %a, i64 %b) {
265 ; FASTLZCNT-LABEL: test_zext_cmp9:
266 ; FASTLZCNT: # %bb.0: # %entry
267 ; FASTLZCNT-NEXT: lzcntq %rsi, %rax
268 ; FASTLZCNT-NEXT: lzcntl %edi, %ecx
269 ; FASTLZCNT-NEXT: shrl $5, %ecx
270 ; FASTLZCNT-NEXT: shrl $6, %eax
271 ; FASTLZCNT-NEXT: orl %ecx, %eax
272 ; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax
273 ; FASTLZCNT-NEXT: retq
275 ; NOFASTLZCNT-LABEL: test_zext_cmp9:
276 ; NOFASTLZCNT: # %bb.0: # %entry
277 ; NOFASTLZCNT-NEXT: testl %edi, %edi
278 ; NOFASTLZCNT-NEXT: sete %al
279 ; NOFASTLZCNT-NEXT: testq %rsi, %rsi
280 ; NOFASTLZCNT-NEXT: sete %cl
281 ; NOFASTLZCNT-NEXT: orb %al, %cl
282 ; NOFASTLZCNT-NEXT: movzbl %cl, %eax
283 ; NOFASTLZCNT-NEXT: retq
285 %cmp = icmp eq i32 %a, 0
286 %cmp1 = icmp eq i64 %b, 0
287 %0 = or i1 %cmp, %cmp1
288 %lor.ext = zext i1 %0 to i32
292 ; Test 2 128-bit inputs, output is 32-bit, no transformations expected.
293 define i32 @test_zext_cmp10(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) {
294 ; ALL-LABEL: test_zext_cmp10:
295 ; ALL: # %bb.0: # %entry
296 ; ALL-NEXT: orq %rsi, %rdi
298 ; ALL-NEXT: orq %rcx, %rdx
300 ; ALL-NEXT: orb %al, %cl
301 ; ALL-NEXT: movzbl %cl, %eax
304 %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128
305 %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64
306 %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128
307 %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext
308 %b.sroa.2.0.insert.ext = zext i64 %b.coerce1 to i128
309 %b.sroa.2.0.insert.shift = shl nuw i128 %b.sroa.2.0.insert.ext, 64
310 %b.sroa.0.0.insert.ext = zext i64 %b.coerce0 to i128
311 %b.sroa.0.0.insert.insert = or i128 %b.sroa.2.0.insert.shift, %b.sroa.0.0.insert.ext
312 %cmp = icmp eq i128 %a.sroa.0.0.insert.insert, 0
313 %cmp3 = icmp eq i128 %b.sroa.0.0.insert.insert, 0
314 %0 = or i1 %cmp, %cmp3
315 %lor.ext = zext i1 %0 to i32
319 ; PR31902 Fix a crash in combineOrCmpEqZeroToCtlzSrl under fast math.
320 define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" {
322 ; ALL-LABEL: test_zext_cmp11:
323 ; ALL: # %bb.0: # %entry
324 ; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
325 ; ALL-NEXT: vucomisd %xmm2, %xmm0
327 ; ALL-NEXT: vucomisd %xmm2, %xmm1
329 ; ALL-NEXT: orb %al, %cl
330 ; ALL-NEXT: movzbl %cl, %eax
333 %cmp = fcmp fast oeq double %a, 0.000000e+00
334 %cmp1 = fcmp fast oeq double %b, 0.000000e+00
335 %0 = or i1 %cmp, %cmp1
336 %conv = zext i1 %0 to i32