1 ; RUN: llc < %s -O3 -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid | FileCheck %s
2 ; <rdar://problem/11635990> [arm64] [lsr] Inefficient EA/loop-exit calc in bzero_phys
4 ; LSR on loop %while.cond should reassociate non-address mode
5 ; expressions at use %cmp16 to avoid sinking computation into %while.body18.
7 ; Remove the -pre-RA-sched=list-hybrid option after fixing:
8 ; <rdar://problem/12702735> [ARM64][coalescer] need better register
9 ; coalescing for simple unit tests.
12 ; CHECK: %while.body18{{$}}
13 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #8
14 ; First set the IVREG variable, then use it
15 ; CHECK-NEXT: sub [[IVREG:x[0-9]+]],
16 ; CHECK: [[IVREG]], #8
17 ; CHECK-NEXT: cmp [[IVREG]], #7
19 define i8* @memset(i8* %dest, i32 %val, i64 %len) nounwind ssp noimplicitfloat {
21 %cmp = icmp eq i64 %len, 0
22 br i1 %cmp, label %done, label %while.cond.preheader
24 while.cond.preheader: ; preds = %entry
25 %conv = trunc i32 %val to i8
28 while.cond: ; preds = %while.body, %while.cond.preheader
29 %ptr.0 = phi i8* [ %incdec.ptr, %while.body ], [ %dest, %while.cond.preheader ]
30 %len.addr.0 = phi i64 [ %dec, %while.body ], [ %len, %while.cond.preheader ]
31 %cond = icmp eq i64 %len.addr.0, 0
32 br i1 %cond, label %done, label %land.rhs
34 land.rhs: ; preds = %while.cond
35 %0 = ptrtoint i8* %ptr.0 to i64
37 %cmp5 = icmp eq i64 %and, 0
38 br i1 %cmp5, label %if.end9, label %while.body
40 while.body: ; preds = %land.rhs
41 %incdec.ptr = getelementptr inbounds i8, i8* %ptr.0, i64 1
42 store i8 %conv, i8* %ptr.0, align 1, !tbaa !0
43 %dec = add i64 %len.addr.0, -1
46 if.end9: ; preds = %land.rhs
47 %conv.mask = and i32 %val, 255
48 %1 = zext i32 %conv.mask to i64
49 %2 = shl nuw nsw i64 %1, 8
50 %ins18 = or i64 %2, %1
51 %3 = shl nuw nsw i64 %1, 16
52 %ins15 = or i64 %ins18, %3
53 %4 = shl nuw nsw i64 %1, 24
54 %5 = shl nuw nsw i64 %1, 32
55 %mask8 = or i64 %ins15, %4
56 %6 = shl nuw nsw i64 %1, 40
57 %mask5 = or i64 %mask8, %5
58 %7 = shl nuw nsw i64 %1, 48
59 %8 = shl nuw i64 %1, 56
60 %mask2.masked = or i64 %mask5, %6
61 %mask = or i64 %mask2.masked, %7
62 %ins = or i64 %mask, %8
63 %9 = bitcast i8* %ptr.0 to i64*
64 %cmp1636 = icmp ugt i64 %len.addr.0, 7
65 br i1 %cmp1636, label %while.body18, label %while.body29.lr.ph
67 while.body18: ; preds = %if.end9, %while.body18
68 %wideptr.038 = phi i64* [ %incdec.ptr19, %while.body18 ], [ %9, %if.end9 ]
69 %len.addr.137 = phi i64 [ %sub, %while.body18 ], [ %len.addr.0, %if.end9 ]
70 %incdec.ptr19 = getelementptr inbounds i64, i64* %wideptr.038, i64 1
71 store i64 %ins, i64* %wideptr.038, align 8, !tbaa !2
72 %sub = add i64 %len.addr.137, -8
73 %cmp16 = icmp ugt i64 %sub, 7
74 br i1 %cmp16, label %while.body18, label %while.end20
76 while.end20: ; preds = %while.body18
77 %cmp21 = icmp eq i64 %sub, 0
78 br i1 %cmp21, label %done, label %while.body29.lr.ph
80 while.body29.lr.ph: ; preds = %while.end20, %if.end9
81 %len.addr.1.lcssa49 = phi i64 [ %sub, %while.end20 ], [ %len.addr.0, %if.end9 ]
82 %wideptr.0.lcssa48 = phi i64* [ %incdec.ptr19, %while.end20 ], [ %9, %if.end9 ]
83 %10 = bitcast i64* %wideptr.0.lcssa48 to i8*
84 br label %while.body29
86 while.body29: ; preds = %while.body29, %while.body29.lr.ph
87 %len.addr.235 = phi i64 [ %len.addr.1.lcssa49, %while.body29.lr.ph ], [ %dec26, %while.body29 ]
88 %ptr.134 = phi i8* [ %10, %while.body29.lr.ph ], [ %incdec.ptr31, %while.body29 ]
89 %dec26 = add i64 %len.addr.235, -1
90 %incdec.ptr31 = getelementptr inbounds i8, i8* %ptr.134, i64 1
91 store i8 %conv, i8* %ptr.134, align 1, !tbaa !0
92 %cmp27 = icmp eq i64 %dec26, 0
93 br i1 %cmp27, label %done, label %while.body29
95 done: ; preds = %while.cond, %while.body29, %while.end20, %entry
99 !0 = !{!"omnipotent char", !1}
100 !1 = !{!"Simple C/C++ TBAA"}
101 !2 = !{!"long long", !0}