1 ; RUN: opt -passes=loop-idiom -mtriple=armv7a < %s -S | FileCheck -check-prefix=LZCNT --check-prefix=ALL %s
2 ; RUN: opt -passes=loop-idiom -mtriple=armv4t < %s -S | FileCheck -check-prefix=NOLZCNT --check-prefix=ALL %s
4 ; Recognize CTLZ builtin pattern.
5 ; Here we'll just convert loop to countable,
6 ; so do not insert builtin if CPU do not support CTLZ
8 ; int ctlz_and_other(int n, char *a)
10 ; n = n >= 0 ? n : -n;
13 ; a[i] = (n0 & (1 << i)) ? 1 : 0;
20 ; LZCNT: %0 = call i32 @llvm.ctlz.i32(i32 %shr8, i1 true)
21 ; LZCNT-NEXT: %1 = sub i32 32, %0
22 ; LZCNT-NEXT: %2 = zext i32 %1 to i64
23 ; LZCNT: %indvars.iv.next.lcssa = phi i64 [ %2, %while.body ]
24 ; LZCNT: %4 = trunc i64 %indvars.iv.next.lcssa to i32
25 ; LZCNT: %i.0.lcssa = phi i32 [ 0, %entry ], [ %4, %while.end.loopexit ]
26 ; LZCNT: ret i32 %i.0.lcssa
29 ; NOLZCNT-NOT: @llvm.ctlz
31 ; Function Attrs: norecurse nounwind uwtable
32 define i32 @ctlz_and_other(i32 %n, ptr nocapture %a) {
34 %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
35 %shr8 = lshr i32 %abs_n, 1
36 %tobool9 = icmp eq i32 %shr8, 0
37 br i1 %tobool9, label %while.end, label %while.body.preheader
39 while.body.preheader: ; preds = %entry
42 while.body: ; preds = %while.body.preheader, %while.body
43 %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
44 %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
45 %0 = trunc i64 %indvars.iv to i32
47 %and = and i32 %shl, %abs_n
48 %tobool1 = icmp ne i32 %and, 0
49 %conv = zext i1 %tobool1 to i8
50 %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
51 store i8 %conv, ptr %arrayidx, align 1
52 %indvars.iv.next = add nuw i64 %indvars.iv, 1
53 %shr = ashr i32 %shr11, 1
54 %tobool = icmp eq i32 %shr, 0
55 br i1 %tobool, label %while.end.loopexit, label %while.body
57 while.end.loopexit: ; preds = %while.body
58 %1 = trunc i64 %indvars.iv.next to i32
61 while.end: ; preds = %while.end.loopexit, %entry
62 %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
66 ; Recognize CTLZ builtin pattern.
67 ; Here it will replace the loop -
68 ; assume builtin is always profitable.
70 ; int ctlz_zero_check(int n)
72 ; n = n >= 0 ? n : -n;
82 ; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %abs_n, i1 true)
83 ; ALL-NEXT: %1 = sub i32 32, %0
84 ; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
85 ; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
86 ; ALL: ret i32 %i.0.lcssa
88 ; Function Attrs: norecurse nounwind readnone uwtable
89 define i32 @ctlz_zero_check(i32 %n) {
91 %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
92 %tobool4 = icmp eq i32 %abs_n, 0
93 br i1 %tobool4, label %while.end, label %while.body.preheader
95 while.body.preheader: ; preds = %entry
98 while.body: ; preds = %while.body.preheader, %while.body
99 %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
100 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
101 %shr = ashr i32 %n.addr.05, 1
102 %inc = add nsw i32 %i.06, 1
103 %tobool = icmp eq i32 %shr, 0
104 br i1 %tobool, label %while.end.loopexit, label %while.body
106 while.end.loopexit: ; preds = %while.body
109 while.end: ; preds = %while.end.loopexit, %entry
110 %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
114 ; Recognize CTLZ builtin pattern.
115 ; Here it will replace the loop -
116 ; assume builtin is always profitable.
120 ; n = n >= 0 ? n : -n;
129 ; ALL: %0 = ashr i32 %abs_n, 1
130 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
131 ; ALL-NEXT: %2 = sub i32 32, %1
132 ; ALL-NEXT: %3 = add i32 %2, 1
133 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
134 ; ALL: ret i32 %i.0.lcssa
136 ; Function Attrs: norecurse nounwind readnone uwtable
137 define i32 @ctlz(i32 %n) {
139 %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
142 while.cond: ; preds = %while.cond, %entry
143 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
144 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
145 %shr = ashr i32 %n.addr.0, 1
146 %tobool = icmp eq i32 %shr, 0
147 %inc = add nsw i32 %i.0, 1
148 br i1 %tobool, label %while.end, label %while.cond
150 while.end: ; preds = %while.cond
154 ; Recognize CTLZ builtin pattern.
155 ; Here it will replace the loop -
156 ; assume builtin is always profitable.
158 ; int ctlz_add(int n, int i0)
160 ; n = n >= 0 ? n : -n;
169 ; ALL: %0 = ashr i32 %abs_n, 1
170 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
171 ; ALL-NEXT: %2 = sub i32 32, %1
172 ; ALL-NEXT: %3 = add i32 %2, 1
173 ; ALL-NEXT: %4 = add i32 %2, %i0
174 ; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
175 ; ALL: ret i32 %i.0.lcssa
177 ; Function Attrs: norecurse nounwind readnone uwtable
178 define i32 @ctlz_add(i32 %n, i32 %i0) {
180 %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
183 while.cond: ; preds = %while.cond, %entry
184 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
185 %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
186 %shr = ashr i32 %n.addr.0, 1
187 %tobool = icmp eq i32 %shr, 0
188 %inc = add nsw i32 %i.0, 1
189 br i1 %tobool, label %while.end, label %while.cond
191 while.end: ; preds = %while.cond
195 ; Recognize CTLZ builtin pattern.
196 ; Here it will replace the loop -
197 ; assume builtin is always profitable.
199 ; int ctlz_sext(short in)
212 ; ALL: %0 = ashr i32 %abs_n, 1
213 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
214 ; ALL-NEXT: %2 = sub i32 32, %1
215 ; ALL-NEXT: %3 = add i32 %2, 1
216 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
217 ; ALL: ret i32 %i.0.lcssa
219 ; Function Attrs: norecurse nounwind readnone uwtable
220 define i32 @ctlz_sext(i16 %in) {
222 %abs = call i16 @llvm.abs.i16(i16 %in, i1 false)
223 %abs_n = zext i16 %abs to i32
226 while.cond: ; preds = %while.cond, %entry
227 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
228 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
229 %shr = ashr i32 %n.addr.0, 1
230 %tobool = icmp eq i32 %shr, 0
231 %inc = add nsw i32 %i.0, 1
232 br i1 %tobool, label %while.end, label %while.cond
234 while.end: ; preds = %while.cond
238 declare i32 @llvm.abs.i32(i32, i1)
239 declare i16 @llvm.abs.i16(i16, i1)