1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s -check-prefixes=ALL,LZCNT
3 ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck %s -check-prefixes=ALL,NOLZCNT
5 ; Recognize CTLZ builtin pattern.
6 ; Here we'll just convert loop to countable,
7 ; so do not insert builtin if CPU do not support CTLZ
9 ; int ctlz_and_other(int n, char *a)
11 ; n = n >= 0 ? n : -n;
14 ; a[i] = (n0 & (1 << i)) ? 1 : 0;
20 define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
21 ; LZCNT-LABEL: @ctlz_and_other(
23 ; LZCNT-NEXT: [[C:%.*]] = icmp sgt i32 [[N:%.*]], 0
24 ; LZCNT-NEXT: [[NEGN:%.*]] = sub nsw i32 0, [[N]]
25 ; LZCNT-NEXT: [[ABS_N:%.*]] = select i1 [[C]], i32 [[N]], i32 [[NEGN]]
26 ; LZCNT-NEXT: [[SHR8:%.*]] = lshr i32 [[ABS_N]], 1
27 ; LZCNT-NEXT: [[TOBOOL9:%.*]] = icmp eq i32 [[SHR8]], 0
28 ; LZCNT-NEXT: br i1 [[TOBOOL9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
29 ; LZCNT: while.body.preheader:
30 ; LZCNT-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHR8]], i1 true)
31 ; LZCNT-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
32 ; LZCNT-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
33 ; LZCNT-NEXT: br label [[WHILE_BODY:%.*]]
35 ; LZCNT-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
36 ; LZCNT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
37 ; LZCNT-NEXT: [[SHR11:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[SHR8]], [[WHILE_BODY_PREHEADER]] ]
38 ; LZCNT-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV]] to i32
39 ; LZCNT-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP3]]
40 ; LZCNT-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[ABS_N]]
41 ; LZCNT-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[AND]], 0
42 ; LZCNT-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL1]] to i8
43 ; LZCNT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[INDVARS_IV]]
44 ; LZCNT-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
45 ; LZCNT-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
46 ; LZCNT-NEXT: [[SHR]] = ashr i32 [[SHR11]], 1
47 ; LZCNT-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
48 ; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
49 ; LZCNT-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
50 ; LZCNT: while.end.loopexit:
51 ; LZCNT-NEXT: [[INDVARS_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY]] ]
52 ; LZCNT-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_LCSSA]] to i32
53 ; LZCNT-NEXT: br label [[WHILE_END]]
55 ; LZCNT-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_END_LOOPEXIT]] ]
56 ; LZCNT-NEXT: ret i32 [[I_0_LCSSA]]
58 ; NOLZCNT-LABEL: @ctlz_and_other(
59 ; NOLZCNT-NEXT: entry:
60 ; NOLZCNT-NEXT: [[C:%.*]] = icmp sgt i32 [[N:%.*]], 0
61 ; NOLZCNT-NEXT: [[NEGN:%.*]] = sub nsw i32 0, [[N]]
62 ; NOLZCNT-NEXT: [[ABS_N:%.*]] = select i1 [[C]], i32 [[N]], i32 [[NEGN]]
63 ; NOLZCNT-NEXT: [[SHR8:%.*]] = lshr i32 [[ABS_N]], 1
64 ; NOLZCNT-NEXT: [[TOBOOL9:%.*]] = icmp eq i32 [[SHR8]], 0
65 ; NOLZCNT-NEXT: br i1 [[TOBOOL9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
66 ; NOLZCNT: while.body.preheader:
67 ; NOLZCNT-NEXT: br label [[WHILE_BODY:%.*]]
68 ; NOLZCNT: while.body:
69 ; NOLZCNT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
70 ; NOLZCNT-NEXT: [[SHR11:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[SHR8]], [[WHILE_BODY_PREHEADER]] ]
71 ; NOLZCNT-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32
72 ; NOLZCNT-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP0]]
73 ; NOLZCNT-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[ABS_N]]
74 ; NOLZCNT-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[AND]], 0
75 ; NOLZCNT-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL1]] to i8
76 ; NOLZCNT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[INDVARS_IV]]
77 ; NOLZCNT-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
78 ; NOLZCNT-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
79 ; NOLZCNT-NEXT: [[SHR]] = ashr i32 [[SHR11]], 1
80 ; NOLZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
81 ; NOLZCNT-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
82 ; NOLZCNT: while.end.loopexit:
83 ; NOLZCNT-NEXT: [[INDVARS_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[WHILE_BODY]] ]
84 ; NOLZCNT-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_LCSSA]] to i32
85 ; NOLZCNT-NEXT: br label [[WHILE_END]]
87 ; NOLZCNT-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[WHILE_END_LOOPEXIT]] ]
88 ; NOLZCNT-NEXT: ret i32 [[I_0_LCSSA]]
91 %c = icmp sgt i32 %n, 0
92 %negn = sub nsw i32 0, %n
93 %abs_n = select i1 %c, i32 %n, i32 %negn
94 %shr8 = lshr i32 %abs_n, 1
95 %tobool9 = icmp eq i32 %shr8, 0
96 br i1 %tobool9, label %while.end, label %while.body.preheader
98 while.body.preheader: ; preds = %entry
101 while.body: ; preds = %while.body.preheader, %while.body
102 %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
103 %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
104 %0 = trunc i64 %indvars.iv to i32
106 %and = and i32 %shl, %abs_n
107 %tobool1 = icmp ne i32 %and, 0
108 %conv = zext i1 %tobool1 to i8
109 %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
110 store i8 %conv, i8* %arrayidx, align 1
111 %indvars.iv.next = add nuw i64 %indvars.iv, 1
112 %shr = ashr i32 %shr11, 1
113 %tobool = icmp eq i32 %shr, 0
114 br i1 %tobool, label %while.end.loopexit, label %while.body
116 while.end.loopexit: ; preds = %while.body
117 %1 = trunc i64 %indvars.iv.next to i32
120 while.end: ; preds = %while.end.loopexit, %entry
121 %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
125 ; Recognize CTLZ builtin pattern.
126 ; Here it will replace the loop -
127 ; assume builtin is always profitable.
129 ; int ctlz_zero_check(int n)
131 ; n = n >= 0 ? n : -n;
140 define i32 @ctlz_zero_check(i32 %n) {
141 ; ALL-LABEL: @ctlz_zero_check(
143 ; ALL-NEXT: [[C:%.*]] = icmp sgt i32 [[N:%.*]], 0
144 ; ALL-NEXT: [[NEGN:%.*]] = sub nsw i32 0, [[N]]
145 ; ALL-NEXT: [[ABS_N:%.*]] = select i1 [[C]], i32 [[N]], i32 [[NEGN]]
146 ; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[ABS_N]], 0
147 ; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
148 ; ALL: while.body.preheader:
149 ; ALL-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[ABS_N]], i1 true)
150 ; ALL-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
151 ; ALL-NEXT: br label [[WHILE_BODY:%.*]]
153 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
154 ; ALL-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
155 ; ALL-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[ABS_N]], [[WHILE_BODY_PREHEADER]] ]
156 ; ALL-NEXT: [[SHR]] = ashr i32 [[N_ADDR_05]], 1
157 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_06]], 1
158 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
159 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
160 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
161 ; ALL: while.end.loopexit:
162 ; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ]
163 ; ALL-NEXT: br label [[WHILE_END]]
165 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
166 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
169 %c = icmp sgt i32 %n, 0
170 %negn = sub nsw i32 0, %n
171 %abs_n = select i1 %c, i32 %n, i32 %negn
172 %tobool4 = icmp eq i32 %abs_n, 0
173 br i1 %tobool4, label %while.end, label %while.body.preheader
175 while.body.preheader: ; preds = %entry
178 while.body: ; preds = %while.body.preheader, %while.body
179 %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
180 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
181 %shr = ashr i32 %n.addr.05, 1
182 %inc = add nsw i32 %i.06, 1
183 %tobool = icmp eq i32 %shr, 0
184 br i1 %tobool, label %while.end.loopexit, label %while.body
186 while.end.loopexit: ; preds = %while.body
189 while.end: ; preds = %while.end.loopexit, %entry
190 %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
194 ; Recognize CTLZ builtin pattern.
195 ; Here it will replace the loop -
196 ; assume builtin is always profitable.
198 ; int ctlz_zero_check_lshr(int n)
208 define i32 @ctlz_zero_check_lshr(i32 %n) {
209 ; ALL-LABEL: @ctlz_zero_check_lshr(
211 ; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
212 ; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
213 ; ALL: while.body.preheader:
214 ; ALL-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[N]], i1 true)
215 ; ALL-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
216 ; ALL-NEXT: br label [[WHILE_BODY:%.*]]
218 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
219 ; ALL-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
220 ; ALL-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
221 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_05]], 1
222 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_06]], 1
223 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
224 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
225 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
226 ; ALL: while.end.loopexit:
227 ; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ]
228 ; ALL-NEXT: br label [[WHILE_END]]
230 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
231 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
234 %tobool4 = icmp eq i32 %n, 0
235 br i1 %tobool4, label %while.end, label %while.body.preheader
237 while.body.preheader: ; preds = %entry
240 while.body: ; preds = %while.body.preheader, %while.body
241 %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
242 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
243 %shr = lshr i32 %n.addr.05, 1
244 %inc = add nsw i32 %i.06, 1
245 %tobool = icmp eq i32 %shr, 0
246 br i1 %tobool, label %while.end.loopexit, label %while.body
248 while.end.loopexit: ; preds = %while.body
251 while.end: ; preds = %while.end.loopexit, %entry
252 %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
256 ; Recognize CTLZ builtin pattern.
257 ; Here it will replace the loop -
258 ; assume builtin is always profitable.
262 ; n = n >= 0 ? n : -n;
270 define i32 @ctlz(i32 %n) {
273 ; ALL-NEXT: [[C:%.*]] = icmp sgt i32 [[N:%.*]], 0
274 ; ALL-NEXT: [[NEGN:%.*]] = sub nsw i32 0, [[N]]
275 ; ALL-NEXT: [[ABS_N:%.*]] = select i1 [[C]], i32 [[N]], i32 [[NEGN]]
276 ; ALL-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
277 ; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
278 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
279 ; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
280 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
282 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
283 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
284 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
285 ; ALL-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
286 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
287 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
288 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
289 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
291 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
292 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
295 %c = icmp sgt i32 %n, 0
296 %negn = sub nsw i32 0, %n
297 %abs_n = select i1 %c, i32 %n, i32 %negn
300 while.cond: ; preds = %while.cond, %entry
301 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
302 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
303 %shr = ashr i32 %n.addr.0, 1
304 %tobool = icmp eq i32 %shr, 0
305 %inc = add nsw i32 %i.0, 1
306 br i1 %tobool, label %while.end, label %while.cond
308 while.end: ; preds = %while.cond
312 ; Recognize CTLZ builtin pattern.
313 ; Here it will replace the loop -
314 ; assume builtin is always profitable.
316 ; int ctlz_lshr(int n)
325 define i32 @ctlz_lshr(i32 %n) {
326 ; ALL-LABEL: @ctlz_lshr(
328 ; ALL-NEXT: [[TMP0:%.*]] = lshr i32 [[N:%.*]], 1
329 ; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
330 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
331 ; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
332 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
334 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
335 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
336 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
337 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
338 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
339 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
340 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
341 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
343 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
344 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
349 while.cond: ; preds = %while.cond, %entry
350 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
351 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
352 %shr = lshr i32 %n.addr.0, 1
353 %tobool = icmp eq i32 %shr, 0
354 %inc = add nsw i32 %i.0, 1
355 br i1 %tobool, label %while.end, label %while.cond
357 while.end: ; preds = %while.cond
361 ; Recognize CTLZ builtin pattern.
362 ; Here it will replace the loop -
363 ; assume builtin is always profitable.
365 ; int ctlz_add(int n, int i0)
367 ; n = n >= 0 ? n : -n;
375 define i32 @ctlz_add(i32 %n, i32 %i0) {
376 ; ALL-LABEL: @ctlz_add(
378 ; ALL-NEXT: [[C:%.*]] = icmp sgt i32 [[N:%.*]], 0
379 ; ALL-NEXT: [[NEGN:%.*]] = sub nsw i32 0, [[N]]
380 ; ALL-NEXT: [[ABS_N:%.*]] = select i1 [[C]], i32 [[N]], i32 [[NEGN]]
381 ; ALL-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
382 ; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
383 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
384 ; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
385 ; ALL-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[I0:%.*]]
386 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
388 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
389 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
390 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
391 ; ALL-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
392 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
393 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
394 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
395 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
397 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
398 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
401 %c = icmp sgt i32 %n, 0
402 %negn = sub nsw i32 0, %n
403 %abs_n = select i1 %c, i32 %n, i32 %negn
406 while.cond: ; preds = %while.cond, %entry
407 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
408 %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
409 %shr = ashr i32 %n.addr.0, 1
410 %tobool = icmp eq i32 %shr, 0
411 %inc = add nsw i32 %i.0, 1
412 br i1 %tobool, label %while.end, label %while.cond
414 while.end: ; preds = %while.cond
418 ; Recognize CTLZ builtin pattern.
419 ; Here it will replace the loop -
420 ; assume builtin is always profitable.
422 ; int ctlz_add_lshr(int n, int i0)
431 define i32 @ctlz_add_lshr(i32 %n, i32 %i0) {
432 ; ALL-LABEL: @ctlz_add_lshr(
434 ; ALL-NEXT: [[TMP0:%.*]] = lshr i32 [[N:%.*]], 1
435 ; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
436 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
437 ; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
438 ; ALL-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[I0:%.*]]
439 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
441 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
442 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
443 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
444 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
445 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
446 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
447 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
448 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
450 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
451 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
456 while.cond: ; preds = %while.cond, %entry
457 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
458 %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
459 %shr = lshr i32 %n.addr.0, 1
460 %tobool = icmp eq i32 %shr, 0
461 %inc = add nsw i32 %i.0, 1
462 br i1 %tobool, label %while.end, label %while.cond
464 while.end: ; preds = %while.cond
468 ; Recognize CTLZ builtin pattern.
469 ; Here it will replace the loop -
470 ; assume builtin is always profitable.
472 ; int ctlz_sext(short in)
484 define i32 @ctlz_sext(i16 %in) {
485 ; ALL-LABEL: @ctlz_sext(
487 ; ALL-NEXT: [[N:%.*]] = sext i16 [[IN:%.*]] to i32
488 ; ALL-NEXT: [[C:%.*]] = icmp sgt i16 [[IN]], 0
489 ; ALL-NEXT: [[NEGN:%.*]] = sub nsw i32 0, [[N]]
490 ; ALL-NEXT: [[ABS_N:%.*]] = select i1 [[C]], i32 [[N]], i32 [[NEGN]]
491 ; ALL-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
492 ; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
493 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
494 ; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
495 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
497 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
498 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
499 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
500 ; ALL-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
501 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
502 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
503 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
504 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
506 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
507 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
510 %n = sext i16 %in to i32
511 %c = icmp sgt i16 %in, 0
512 %negn = sub nsw i32 0, %n
513 %abs_n = select i1 %c, i32 %n, i32 %negn
516 while.cond: ; preds = %while.cond, %entry
517 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
518 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
519 %shr = ashr i32 %n.addr.0, 1
520 %tobool = icmp eq i32 %shr, 0
521 %inc = add nsw i32 %i.0, 1
522 br i1 %tobool, label %while.end, label %while.cond
524 while.end: ; preds = %while.cond
528 ; Recognize CTLZ builtin pattern.
529 ; Here it will replace the loop -
530 ; assume builtin is always profitable.
532 ; int ctlz_sext_lshr(short in)
541 define i32 @ctlz_sext_lshr(i16 %in) {
542 ; ALL-LABEL: @ctlz_sext_lshr(
544 ; ALL-NEXT: [[N:%.*]] = sext i16 [[IN:%.*]] to i32
545 ; ALL-NEXT: [[TMP0:%.*]] = lshr i32 [[N]], 1
546 ; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
547 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
548 ; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
549 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
551 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
552 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
553 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
554 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
555 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
556 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
557 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
558 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
560 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
561 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
564 %n = sext i16 %in to i32
567 while.cond: ; preds = %while.cond, %entry
568 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
569 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
570 %shr = lshr i32 %n.addr.0, 1
571 %tobool = icmp eq i32 %shr, 0
572 %inc = add nsw i32 %i.0, 1
573 br i1 %tobool, label %while.end, label %while.cond
575 while.end: ; preds = %while.cond
579 ; This loop contains a volatile store. If x is initially negative,
580 ; the code will be an infinite loop because the ashr will eventually produce
581 ; all ones and continue doing so. This prevents the loop from terminating. If
582 ; we convert this to a countable loop using ctlz that loop will only run 32
583 ; times. This is different than the infinite number of times of the original.
584 define i32 @foo(i32 %x) {
587 ; ALL-NEXT: [[V:%.*]] = alloca i8, align 1
588 ; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[X:%.*]], 0
589 ; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_LR_PH:%.*]]
590 ; ALL: while.body.lr.ph:
591 ; ALL-NEXT: br label [[WHILE_BODY:%.*]]
593 ; ALL-NEXT: [[CNT_06:%.*]] = phi i32 [ 0, [[WHILE_BODY_LR_PH]] ], [ [[INC:%.*]], [[WHILE_BODY]] ]
594 ; ALL-NEXT: [[X_ADDR_05:%.*]] = phi i32 [ [[X]], [[WHILE_BODY_LR_PH]] ], [ [[SHR:%.*]], [[WHILE_BODY]] ]
595 ; ALL-NEXT: [[SHR]] = ashr i32 [[X_ADDR_05]], 1
596 ; ALL-NEXT: [[INC]] = add i32 [[CNT_06]], 1
597 ; ALL-NEXT: store volatile i8 42, i8* [[V]], align 1
598 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
599 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]], label [[WHILE_BODY]]
600 ; ALL: while.cond.while.end_crit_edge:
601 ; ALL-NEXT: [[SPLIT:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
602 ; ALL-NEXT: br label [[WHILE_END]]
604 ; ALL-NEXT: [[CNT_0_LCSSA:%.*]] = phi i32 [ [[SPLIT]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
605 ; ALL-NEXT: ret i32 [[CNT_0_LCSSA]]
608 %v = alloca i8, align 1
609 %tobool4 = icmp eq i32 %x, 0
610 br i1 %tobool4, label %while.end, label %while.body.lr.ph
612 while.body.lr.ph: ; preds = %entry
615 while.body: ; preds = %while.body.lr.ph, %while.body
616 %cnt.06 = phi i32 [ 0, %while.body.lr.ph ], [ %inc, %while.body ]
617 %x.addr.05 = phi i32 [ %x, %while.body.lr.ph ], [ %shr, %while.body ]
618 %shr = ashr i32 %x.addr.05, 1
619 %inc = add i32 %cnt.06, 1
620 store volatile i8 42, i8* %v, align 1
621 %tobool = icmp eq i32 %shr, 0
622 br i1 %tobool, label %while.cond.while.end_crit_edge, label %while.body
624 while.cond.while.end_crit_edge: ; preds = %while.body
625 %split = phi i32 [ %inc, %while.body ]
628 while.end: ; preds = %while.cond.while.end_crit_edge, %entry
629 %cnt.0.lcssa = phi i32 [ %split, %while.cond.while.end_crit_edge ], [ 0, %entry ]
633 ; We can't easily transform this loop. It returns 1 for an input of both
636 ; int ctlz_bad(unsigned n)
646 define i32 @ctlz_bad(i32 %n) {
647 ; ALL-LABEL: @ctlz_bad(
649 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
651 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
652 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
653 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
654 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
655 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
656 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
658 ; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ]
659 ; ALL-NEXT: ret i32 [[INC_LCSSA]]
664 while.cond: ; preds = %while.cond, %entry
665 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
666 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
667 %shr = lshr i32 %n.addr.0, 1
668 %tobool = icmp eq i32 %shr, 0
669 %inc = add nsw i32 %i.0, 1
670 br i1 %tobool, label %while.end, label %while.cond
672 while.end: ; preds = %while.cond
676 ; Recognize CTLZ builtin pattern.
677 ; Here it will replace the loop -
678 ; assume builtin is always profitable.
680 ; int ctlz_decrement(int n)
690 define i32 @ctlz_decrement(i32 %n) {
691 ; ALL-LABEL: @ctlz_decrement(
693 ; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
694 ; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
695 ; ALL: while.body.preheader:
696 ; ALL-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[N]], i1 true)
697 ; ALL-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
698 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
699 ; ALL-NEXT: br label [[WHILE_BODY:%.*]]
701 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
702 ; ALL-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, [[WHILE_BODY_PREHEADER]] ]
703 ; ALL-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
704 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_05]], 1
705 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_06]], -1
706 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
707 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
708 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
709 ; ALL: while.end.loopexit:
710 ; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY]] ]
711 ; ALL-NEXT: br label [[WHILE_END]]
713 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
714 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
717 %tobool4 = icmp eq i32 %n, 0
718 br i1 %tobool4, label %while.end, label %while.body.preheader
720 while.body.preheader: ; preds = %entry
723 while.body: ; preds = %while.body.preheader, %while.body
724 %i.06 = phi i32 [ %inc, %while.body ], [ 32, %while.body.preheader ]
725 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
726 %shr = lshr i32 %n.addr.05, 1
727 %inc = add nsw i32 %i.06, -1
728 %tobool = icmp eq i32 %shr, 0
729 br i1 %tobool, label %while.end.loopexit, label %while.body
731 while.end.loopexit: ; preds = %while.body
734 while.end: ; preds = %while.end.loopexit, %entry
735 %i.0.lcssa = phi i32 [ 32, %entry ], [ %inc, %while.end.loopexit ]
739 ; Recognize CTLZ builtin pattern.
740 ; Here it will replace the loop -
741 ; assume builtin is always profitable.
743 ; int ctlz_lshr_decrement(int n)
752 define i32 @ctlz_lshr_decrement(i32 %n) {
753 ; ALL-LABEL: @ctlz_lshr_decrement(
755 ; ALL-NEXT: [[TMP0:%.*]] = lshr i32 [[N:%.*]], 1
756 ; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
757 ; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
758 ; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
759 ; ALL-NEXT: [[TMP4:%.*]] = sub i32 31, [[TMP2]]
760 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
762 ; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
763 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
764 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
765 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
766 ; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
767 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
768 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], -1
769 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
771 ; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
772 ; ALL-NEXT: ret i32 [[I_0_LCSSA]]
777 while.cond: ; preds = %while.cond, %entry
778 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
779 %i.0 = phi i32 [ 31, %entry ], [ %inc, %while.cond ]
780 %shr = lshr i32 %n.addr.0, 1
781 %tobool = icmp eq i32 %shr, 0
782 %inc = add nsw i32 %i.0, -1
783 br i1 %tobool, label %while.end, label %while.cond
785 while.end: ; preds = %while.cond