1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu | FileCheck %s
4 declare i32 @bcmp(ptr, ptr, i64)
6 define i1 @bcmp0(ptr %a, ptr %b) {
9 ; CHECK-NEXT: mov w0, #1 // =0x1
11 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 0)
12 %r = icmp eq i32 %cr, 0
16 define i1 @bcmp1(ptr %a, ptr %b) {
19 ; CHECK-NEXT: ldrb w8, [x0]
20 ; CHECK-NEXT: ldrb w9, [x1]
21 ; CHECK-NEXT: cmp w8, w9
22 ; CHECK-NEXT: cset w0, eq
24 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 1)
25 %r = icmp eq i32 %cr, 0
29 define i1 @bcmp2(ptr %a, ptr %b) {
32 ; CHECK-NEXT: ldrh w8, [x0]
33 ; CHECK-NEXT: ldrh w9, [x1]
34 ; CHECK-NEXT: cmp w8, w9
35 ; CHECK-NEXT: cset w0, eq
37 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 2)
38 %r = icmp eq i32 %cr, 0
42 ; or (and (xor a, b), C1), (and (xor c, d), C2)
43 define i1 @bcmp3(ptr %a, ptr %b) {
46 ; CHECK-NEXT: ldrh w8, [x0]
47 ; CHECK-NEXT: ldrh w9, [x1]
48 ; CHECK-NEXT: ldrb w10, [x0, #2]
49 ; CHECK-NEXT: ldrb w11, [x1, #2]
50 ; CHECK-NEXT: cmp w8, w9
51 ; CHECK-NEXT: ccmp w10, w11, #0, eq
52 ; CHECK-NEXT: cset w0, eq
54 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 3)
55 %r = icmp eq i32 %cr, 0
59 define i1 @bcmp4(ptr %a, ptr %b) {
62 ; CHECK-NEXT: ldr w8, [x0]
63 ; CHECK-NEXT: ldr w9, [x1]
64 ; CHECK-NEXT: cmp w8, w9
65 ; CHECK-NEXT: cset w0, eq
67 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 4)
68 %r = icmp eq i32 %cr, 0
72 ; or (xor a, b), (and (xor c, d), C2)
73 define i1 @bcmp5(ptr %a, ptr %b) {
76 ; CHECK-NEXT: ldr w8, [x0]
77 ; CHECK-NEXT: ldr w9, [x1]
78 ; CHECK-NEXT: ldrb w10, [x0, #4]
79 ; CHECK-NEXT: ldrb w11, [x1, #4]
80 ; CHECK-NEXT: cmp w8, w9
81 ; CHECK-NEXT: ccmp w10, w11, #0, eq
82 ; CHECK-NEXT: cset w0, eq
84 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 5)
85 %r = icmp eq i32 %cr, 0
89 ; or (xor a, b), (and (xor c, d), C2)
90 define i1 @bcmp6(ptr %a, ptr %b) {
93 ; CHECK-NEXT: ldr w8, [x0]
94 ; CHECK-NEXT: ldr w9, [x1]
95 ; CHECK-NEXT: ldrh w10, [x0, #4]
96 ; CHECK-NEXT: ldrh w11, [x1, #4]
97 ; CHECK-NEXT: cmp w8, w9
98 ; CHECK-NEXT: ccmp w10, w11, #0, eq
99 ; CHECK-NEXT: cset w0, eq
101 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 6)
102 %r = icmp eq i32 %cr, 0
106 ; or (xor a, b), (xor c, d)
107 define i1 @bcmp7(ptr %a, ptr %b) {
108 ; CHECK-LABEL: bcmp7:
110 ; CHECK-NEXT: ldr w8, [x0]
111 ; CHECK-NEXT: ldr w9, [x1]
112 ; CHECK-NEXT: ldur w10, [x0, #3]
113 ; CHECK-NEXT: ldur w11, [x1, #3]
114 ; CHECK-NEXT: cmp w8, w9
115 ; CHECK-NEXT: ccmp w10, w11, #0, eq
116 ; CHECK-NEXT: cset w0, eq
118 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 7)
119 %r = icmp eq i32 %cr, 0
123 define i1 @bcmp8(ptr %a, ptr %b) {
124 ; CHECK-LABEL: bcmp8:
126 ; CHECK-NEXT: ldr x8, [x0]
127 ; CHECK-NEXT: ldr x9, [x1]
128 ; CHECK-NEXT: cmp x8, x9
129 ; CHECK-NEXT: cset w0, eq
131 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 8)
132 %r = icmp eq i32 %cr, 0
136 ; or (xor a, b), (and (xor c, d), C2)
137 define i1 @bcmp9(ptr %a, ptr %b) {
138 ; CHECK-LABEL: bcmp9:
140 ; CHECK-NEXT: ldr x8, [x0]
141 ; CHECK-NEXT: ldr x9, [x1]
142 ; CHECK-NEXT: ldrb w10, [x0, #8]
143 ; CHECK-NEXT: ldrb w11, [x1, #8]
144 ; CHECK-NEXT: cmp x8, x9
145 ; CHECK-NEXT: ccmp x10, x11, #0, eq
146 ; CHECK-NEXT: cset w0, eq
148 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9)
149 %r = icmp eq i32 %cr, 0
153 define i1 @bcmp10(ptr %a, ptr %b) {
154 ; CHECK-LABEL: bcmp10:
156 ; CHECK-NEXT: ldr x8, [x0]
157 ; CHECK-NEXT: ldr x9, [x1]
158 ; CHECK-NEXT: ldrh w10, [x0, #8]
159 ; CHECK-NEXT: ldrh w11, [x1, #8]
160 ; CHECK-NEXT: cmp x8, x9
161 ; CHECK-NEXT: ccmp x10, x11, #0, eq
162 ; CHECK-NEXT: cset w0, eq
164 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10)
165 %r = icmp eq i32 %cr, 0
169 define i1 @bcmp11(ptr %a, ptr %b) {
170 ; CHECK-LABEL: bcmp11:
172 ; CHECK-NEXT: ldr x8, [x0]
173 ; CHECK-NEXT: ldr x9, [x1]
174 ; CHECK-NEXT: ldur x10, [x0, #3]
175 ; CHECK-NEXT: ldur x11, [x1, #3]
176 ; CHECK-NEXT: cmp x8, x9
177 ; CHECK-NEXT: ccmp x10, x11, #0, eq
178 ; CHECK-NEXT: cset w0, eq
180 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 11)
181 %r = icmp eq i32 %cr, 0
185 define i1 @bcmp12(ptr %a, ptr %b) {
186 ; CHECK-LABEL: bcmp12:
188 ; CHECK-NEXT: ldr x8, [x0]
189 ; CHECK-NEXT: ldr x9, [x1]
190 ; CHECK-NEXT: ldr w10, [x0, #8]
191 ; CHECK-NEXT: ldr w11, [x1, #8]
192 ; CHECK-NEXT: cmp x8, x9
193 ; CHECK-NEXT: ccmp x10, x11, #0, eq
194 ; CHECK-NEXT: cset w0, eq
196 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12)
197 %r = icmp eq i32 %cr, 0
201 define i1 @bcmp13(ptr %a, ptr %b) {
202 ; CHECK-LABEL: bcmp13:
204 ; CHECK-NEXT: ldr x8, [x0]
205 ; CHECK-NEXT: ldr x9, [x1]
206 ; CHECK-NEXT: ldur x10, [x0, #5]
207 ; CHECK-NEXT: ldur x11, [x1, #5]
208 ; CHECK-NEXT: cmp x8, x9
209 ; CHECK-NEXT: ccmp x10, x11, #0, eq
210 ; CHECK-NEXT: cset w0, eq
212 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 13)
213 %r = icmp eq i32 %cr, 0
217 define i1 @bcmp14(ptr %a, ptr %b) {
218 ; CHECK-LABEL: bcmp14:
220 ; CHECK-NEXT: ldr x8, [x0]
221 ; CHECK-NEXT: ldr x9, [x1]
222 ; CHECK-NEXT: ldur x10, [x0, #6]
223 ; CHECK-NEXT: ldur x11, [x1, #6]
224 ; CHECK-NEXT: cmp x8, x9
225 ; CHECK-NEXT: ccmp x10, x11, #0, eq
226 ; CHECK-NEXT: cset w0, eq
228 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 14)
229 %r = icmp eq i32 %cr, 0
233 define i1 @bcmp15(ptr %a, ptr %b) {
234 ; CHECK-LABEL: bcmp15:
236 ; CHECK-NEXT: ldr x8, [x0]
237 ; CHECK-NEXT: ldr x9, [x1]
238 ; CHECK-NEXT: ldur x10, [x0, #7]
239 ; CHECK-NEXT: ldur x11, [x1, #7]
240 ; CHECK-NEXT: cmp x8, x9
241 ; CHECK-NEXT: ccmp x10, x11, #0, eq
242 ; CHECK-NEXT: cset w0, eq
244 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 15)
245 %r = icmp eq i32 %cr, 0
249 define i1 @bcmp16(ptr %a, ptr %b) {
250 ; CHECK-LABEL: bcmp16:
252 ; CHECK-NEXT: ldp x8, x11, [x1]
253 ; CHECK-NEXT: ldp x9, x10, [x0]
254 ; CHECK-NEXT: cmp x9, x8
255 ; CHECK-NEXT: ccmp x10, x11, #0, eq
256 ; CHECK-NEXT: cset w0, eq
258 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 16)
259 %r = icmp eq i32 %cr, 0
263 define i1 @bcmp20(ptr %a, ptr %b) {
264 ; CHECK-LABEL: bcmp20:
266 ; CHECK-NEXT: ldp x8, x11, [x1]
267 ; CHECK-NEXT: ldr w12, [x0, #16]
268 ; CHECK-NEXT: ldp x9, x10, [x0]
269 ; CHECK-NEXT: ldr w13, [x1, #16]
270 ; CHECK-NEXT: cmp x9, x8
271 ; CHECK-NEXT: ccmp x10, x11, #0, eq
272 ; CHECK-NEXT: ccmp x12, x13, #0, eq
273 ; CHECK-NEXT: cset w0, eq
275 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20)
276 %r = icmp eq i32 %cr, 0
280 define i1 @bcmp24(ptr %a, ptr %b) {
281 ; CHECK-LABEL: bcmp24:
283 ; CHECK-NEXT: ldp x8, x11, [x1]
284 ; CHECK-NEXT: ldr x12, [x0, #16]
285 ; CHECK-NEXT: ldp x9, x10, [x0]
286 ; CHECK-NEXT: ldr x13, [x1, #16]
287 ; CHECK-NEXT: cmp x9, x8
288 ; CHECK-NEXT: ccmp x10, x11, #0, eq
289 ; CHECK-NEXT: ccmp x12, x13, #0, eq
290 ; CHECK-NEXT: cset w0, eq
292 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24)
293 %r = icmp eq i32 %cr, 0
297 define i1 @bcmp28(ptr %a, ptr %b) {
298 ; CHECK-LABEL: bcmp28:
300 ; CHECK-NEXT: ldp x8, x11, [x1]
301 ; CHECK-NEXT: ldr x12, [x0, #16]
302 ; CHECK-NEXT: ldp x9, x10, [x0]
303 ; CHECK-NEXT: ldr x13, [x1, #16]
304 ; CHECK-NEXT: cmp x9, x8
305 ; CHECK-NEXT: ldr w8, [x0, #24]
306 ; CHECK-NEXT: ldr w9, [x1, #24]
307 ; CHECK-NEXT: ccmp x10, x11, #0, eq
308 ; CHECK-NEXT: ccmp x12, x13, #0, eq
309 ; CHECK-NEXT: ccmp x8, x9, #0, eq
310 ; CHECK-NEXT: cset w0, eq
312 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28)
313 %r = icmp eq i32 %cr, 0
317 define i1 @bcmp33(ptr %a, ptr %b) {
318 ; CHECK-LABEL: bcmp33:
320 ; CHECK-NEXT: ldp x8, x11, [x1]
321 ; CHECK-NEXT: ldp x9, x10, [x0]
322 ; CHECK-NEXT: ldp x12, x13, [x1, #16]
323 ; CHECK-NEXT: cmp x9, x8
324 ; CHECK-NEXT: ldp x8, x9, [x0, #16]
325 ; CHECK-NEXT: ccmp x10, x11, #0, eq
326 ; CHECK-NEXT: ldrb w10, [x0, #32]
327 ; CHECK-NEXT: ldrb w11, [x1, #32]
328 ; CHECK-NEXT: ccmp x8, x12, #0, eq
329 ; CHECK-NEXT: ccmp x9, x13, #0, eq
330 ; CHECK-NEXT: ccmp x10, x11, #0, eq
331 ; CHECK-NEXT: cset w0, eq
333 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33)
334 %r = icmp eq i32 %cr, 0
338 define i1 @bcmp38(ptr %a, ptr %b) {
339 ; CHECK-LABEL: bcmp38:
341 ; CHECK-NEXT: ldp x8, x11, [x1]
342 ; CHECK-NEXT: ldp x9, x10, [x0]
343 ; CHECK-NEXT: ldp x12, x13, [x1, #16]
344 ; CHECK-NEXT: cmp x9, x8
345 ; CHECK-NEXT: ldp x8, x9, [x0, #16]
346 ; CHECK-NEXT: ccmp x10, x11, #0, eq
347 ; CHECK-NEXT: ldur x10, [x0, #30]
348 ; CHECK-NEXT: ldur x11, [x1, #30]
349 ; CHECK-NEXT: ccmp x8, x12, #0, eq
350 ; CHECK-NEXT: ccmp x9, x13, #0, eq
351 ; CHECK-NEXT: ccmp x10, x11, #0, eq
352 ; CHECK-NEXT: cset w0, eq
354 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38)
355 %r = icmp eq i32 %cr, 0
359 define i1 @bcmp45(ptr %a, ptr %b) {
360 ; CHECK-LABEL: bcmp45:
362 ; CHECK-NEXT: ldp x8, x11, [x1]
363 ; CHECK-NEXT: ldp x9, x10, [x0]
364 ; CHECK-NEXT: ldp x12, x13, [x1, #16]
365 ; CHECK-NEXT: cmp x9, x8
366 ; CHECK-NEXT: ldp x8, x9, [x0, #16]
367 ; CHECK-NEXT: ccmp x10, x11, #0, eq
368 ; CHECK-NEXT: ldr x10, [x0, #32]
369 ; CHECK-NEXT: ldr x11, [x1, #32]
370 ; CHECK-NEXT: ccmp x8, x12, #0, eq
371 ; CHECK-NEXT: ldur x8, [x0, #37]
372 ; CHECK-NEXT: ldur x12, [x1, #37]
373 ; CHECK-NEXT: ccmp x9, x13, #0, eq
374 ; CHECK-NEXT: ccmp x10, x11, #0, eq
375 ; CHECK-NEXT: ccmp x8, x12, #0, eq
376 ; CHECK-NEXT: cset w0, eq
378 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45)
379 %r = icmp eq i32 %cr, 0
383 ; Although the large cmp chain may be not profitable on high end CPU, we
384 ; believe it is better on most cpus, so perform the transform now.
385 ; 8 xor + 7 or + 1 cmp only need 6 cycles on a 4 width ALU port machine
389 define i1 @bcmp64(ptr %a, ptr %b) {
390 ; CHECK-LABEL: bcmp64:
392 ; CHECK-NEXT: ldp x8, x11, [x1]
393 ; CHECK-NEXT: ldp x9, x10, [x0]
394 ; CHECK-NEXT: ldp x12, x13, [x1, #16]
395 ; CHECK-NEXT: cmp x9, x8
396 ; CHECK-NEXT: ldp x8, x9, [x0, #16]
397 ; CHECK-NEXT: ccmp x10, x11, #0, eq
398 ; CHECK-NEXT: ccmp x8, x12, #0, eq
399 ; CHECK-NEXT: ldp x8, x11, [x0, #32]
400 ; CHECK-NEXT: ldp x10, x12, [x1, #32]
401 ; CHECK-NEXT: ccmp x9, x13, #0, eq
402 ; CHECK-NEXT: ldp x9, x13, [x1, #48]
403 ; CHECK-NEXT: ccmp x8, x10, #0, eq
404 ; CHECK-NEXT: ldp x8, x10, [x0, #48]
405 ; CHECK-NEXT: ccmp x11, x12, #0, eq
406 ; CHECK-NEXT: ccmp x8, x9, #0, eq
407 ; CHECK-NEXT: ccmp x10, x13, #0, eq
408 ; CHECK-NEXT: cset w0, eq
410 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64)
411 %r = icmp eq i32 %cr, 0
415 define i1 @bcmp89(ptr %a, ptr %b) {
416 ; CHECK-LABEL: bcmp89:
418 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
419 ; CHECK-NEXT: .cfi_def_cfa_offset 16
420 ; CHECK-NEXT: .cfi_offset w30, -16
421 ; CHECK-NEXT: mov w2, #89 // =0x59
422 ; CHECK-NEXT: bl bcmp
423 ; CHECK-NEXT: cmp w0, #0
424 ; CHECK-NEXT: cset w0, eq
425 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
427 %cr = call i32 @bcmp(ptr %a, ptr %b, i64 89)
428 %r = icmp eq i32 %cr, 0
432 define i1 @bcmp_zext(i32 %0, i32 %1, i8 %2, i8 %3) {
433 ; CHECK-LABEL: bcmp_zext:
435 ; CHECK-NEXT: and w8, w2, #0xff
436 ; CHECK-NEXT: and w9, w3, #0xff
437 ; CHECK-NEXT: cmp w1, w0
438 ; CHECK-NEXT: ccmp w9, w8, #0, eq
439 ; CHECK-NEXT: cset w0, eq
443 %7 = zext i8 %6 to i32
445 %9 = icmp eq i32 %8, 0
449 define i1 @bcmp_i8(i8 %a0, i8 %b0, i8 %a1, i8 %b1, i8 %a2, i8 %b2) {
450 ; CHECK-LABEL: bcmp_i8:
452 ; CHECK-NEXT: and w8, w1, #0xff
453 ; CHECK-NEXT: and w9, w2, #0xff
454 ; CHECK-NEXT: and w10, w3, #0xff
455 ; CHECK-NEXT: cmp w8, w0, uxtb
456 ; CHECK-NEXT: and w8, w4, #0xff
457 ; CHECK-NEXT: and w11, w5, #0xff
458 ; CHECK-NEXT: ccmp w10, w9, #0, eq
459 ; CHECK-NEXT: ccmp w11, w8, #0, eq
460 ; CHECK-NEXT: cset w0, eq
462 %xor0 = xor i8 %b0, %a0
463 %xor1 = xor i8 %b1, %a1
464 %xor2 = xor i8 %b2, %a2
465 %or0 = or i8 %xor0, %xor1
466 %or1 = or i8 %or0, %xor2
467 %r = icmp eq i8 %or1, 0
471 define i1 @bcmp_i16(i16 %a0, i16 %b0, i16 %a1, i16 %b1, i16 %a2, i16 %b2) {
472 ; CHECK-LABEL: bcmp_i16:
474 ; CHECK-NEXT: and w8, w1, #0xffff
475 ; CHECK-NEXT: and w9, w2, #0xffff
476 ; CHECK-NEXT: and w10, w3, #0xffff
477 ; CHECK-NEXT: cmp w8, w0, uxth
478 ; CHECK-NEXT: and w8, w4, #0xffff
479 ; CHECK-NEXT: and w11, w5, #0xffff
480 ; CHECK-NEXT: ccmp w10, w9, #0, eq
481 ; CHECK-NEXT: ccmp w11, w8, #0, eq
482 ; CHECK-NEXT: cset w0, eq
484 %xor0 = xor i16 %b0, %a0
485 %xor1 = xor i16 %b1, %a1
486 %xor2 = xor i16 %b2, %a2
487 %or0 = or i16 %xor0, %xor1
488 %or1 = or i16 %or0, %xor2
489 %r = icmp eq i16 %or1, 0
493 define i1 @bcmp_i128(i128 %a0, i128 %b0, i128 %a1, i128 %b1, i128 %a2, i128 %b2) {
494 ; CHECK-LABEL: bcmp_i128:
496 ; CHECK-NEXT: cmp x2, x0
497 ; CHECK-NEXT: ldp x8, x10, [sp]
498 ; CHECK-NEXT: ccmp x3, x1, #0, eq
499 ; CHECK-NEXT: ldp x9, x11, [sp, #16]
500 ; CHECK-NEXT: ccmp x6, x4, #0, eq
501 ; CHECK-NEXT: ccmp x7, x5, #0, eq
502 ; CHECK-NEXT: cset w12, ne
503 ; CHECK-NEXT: cmp x9, x8
504 ; CHECK-NEXT: ccmp x11, x10, #0, eq
505 ; CHECK-NEXT: csinc w0, w12, wzr, eq
507 %xor0 = xor i128 %b0, %a0
508 %xor1 = xor i128 %b1, %a1
509 %xor2 = xor i128 %b2, %a2
510 %or0 = or i128 %xor0, %xor1
511 %or1 = or i128 %or0, %xor2
512 %r = icmp ne i128 %or1, 0
516 define i1 @bcmp_i42(i42 %a0, i42 %b0, i42 %a1, i42 %b1, i42 %a2, i42 %b2) {
517 ; CHECK-LABEL: bcmp_i42:
519 ; CHECK-NEXT: and x8, x0, #0x3ffffffffff
520 ; CHECK-NEXT: and x9, x1, #0x3ffffffffff
521 ; CHECK-NEXT: and x10, x2, #0x3ffffffffff
522 ; CHECK-NEXT: and x11, x3, #0x3ffffffffff
523 ; CHECK-NEXT: cmp x9, x8
524 ; CHECK-NEXT: and x8, x4, #0x3ffffffffff
525 ; CHECK-NEXT: and x9, x5, #0x3ffffffffff
526 ; CHECK-NEXT: ccmp x11, x10, #0, eq
527 ; CHECK-NEXT: ccmp x9, x8, #0, eq
528 ; CHECK-NEXT: cset w0, ne
530 %xor0 = xor i42 %b0, %a0
531 %xor1 = xor i42 %b1, %a1
532 %xor2 = xor i42 %b2, %a2
533 %or0 = or i42 %xor0, %xor1
534 %or1 = or i42 %or0, %xor2
535 %r = icmp ne i42 %or1, 0