1 ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck -check-prefix=LZCNT --check-prefix=ALL %s
2 ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck -check-prefix=NOLZCNT --check-prefix=ALL %s
4 ; Recognize CTLZ builtin pattern.
5 ; Here we'll just convert loop to countable,
6 ; so do not insert builtin if CPU do not support CTLZ
8 ; int ctlz_and_other(int n, char *a)
10 ; n = n >= 0 ? n : -n;
13 ; a[i] = (n0 & (1 << i)) ? 1 : 0;
20 ; LZCNT: %0 = call i32 @llvm.ctlz.i32(i32 %shr8, i1 true)
21 ; LZCNT-NEXT: %1 = sub i32 32, %0
22 ; LZCNT-NEXT: %2 = zext i32 %1 to i64
23 ; LZCNT: %indvars.iv.next.lcssa = phi i64 [ %2, %while.body ]
24 ; LZCNT: %4 = trunc i64 %indvars.iv.next.lcssa to i32
25 ; LZCNT: %i.0.lcssa = phi i32 [ 0, %entry ], [ %4, %while.end.loopexit ]
26 ; LZCNT: ret i32 %i.0.lcssa
29 ; NOLZCNT-NOT: @llvm.ctlz
31 ; Function Attrs: norecurse nounwind uwtable
32 define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
34 %c = icmp sgt i32 %n, 0
35 %negn = sub nsw i32 0, %n
36 %abs_n = select i1 %c, i32 %n, i32 %negn
37 %shr8 = lshr i32 %abs_n, 1
38 %tobool9 = icmp eq i32 %shr8, 0
39 br i1 %tobool9, label %while.end, label %while.body.preheader
41 while.body.preheader: ; preds = %entry
44 while.body: ; preds = %while.body.preheader, %while.body
45 %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
46 %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
47 %0 = trunc i64 %indvars.iv to i32
49 %and = and i32 %shl, %abs_n
50 %tobool1 = icmp ne i32 %and, 0
51 %conv = zext i1 %tobool1 to i8
52 %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
53 store i8 %conv, i8* %arrayidx, align 1
54 %indvars.iv.next = add nuw i64 %indvars.iv, 1
55 %shr = ashr i32 %shr11, 1
56 %tobool = icmp eq i32 %shr, 0
57 br i1 %tobool, label %while.end.loopexit, label %while.body
59 while.end.loopexit: ; preds = %while.body
60 %1 = trunc i64 %indvars.iv.next to i32
63 while.end: ; preds = %while.end.loopexit, %entry
64 %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
68 ; Recognize CTLZ builtin pattern.
69 ; Here it will replace the loop -
70 ; assume builtin is always profitable.
72 ; int ctlz_zero_check(int n)
74 ; n = n >= 0 ? n : -n;
84 ; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %abs_n, i1 true)
85 ; ALL-NEXT: %1 = sub i32 32, %0
86 ; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
87 ; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
88 ; ALL: ret i32 %i.0.lcssa
90 ; Function Attrs: norecurse nounwind readnone uwtable
91 define i32 @ctlz_zero_check(i32 %n) {
93 %c = icmp sgt i32 %n, 0
94 %negn = sub nsw i32 0, %n
95 %abs_n = select i1 %c, i32 %n, i32 %negn
96 %tobool4 = icmp eq i32 %abs_n, 0
97 br i1 %tobool4, label %while.end, label %while.body.preheader
99 while.body.preheader: ; preds = %entry
102 while.body: ; preds = %while.body.preheader, %while.body
103 %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
104 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
105 %shr = ashr i32 %n.addr.05, 1
106 %inc = add nsw i32 %i.06, 1
107 %tobool = icmp eq i32 %shr, 0
108 br i1 %tobool, label %while.end.loopexit, label %while.body
110 while.end.loopexit: ; preds = %while.body
113 while.end: ; preds = %while.end.loopexit, %entry
114 %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
118 ; Recognize CTLZ builtin pattern.
119 ; Here it will replace the loop -
120 ; assume builtin is always profitable.
122 ; int ctlz_zero_check_lshr(int n)
133 ; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
134 ; ALL-NEXT: %1 = sub i32 32, %0
135 ; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
136 ; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
137 ; ALL: ret i32 %i.0.lcssa
139 ; Function Attrs: norecurse nounwind readnone uwtable
140 define i32 @ctlz_zero_check_lshr(i32 %n) {
142 %tobool4 = icmp eq i32 %n, 0
143 br i1 %tobool4, label %while.end, label %while.body.preheader
145 while.body.preheader: ; preds = %entry
148 while.body: ; preds = %while.body.preheader, %while.body
149 %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
150 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
151 %shr = lshr i32 %n.addr.05, 1
152 %inc = add nsw i32 %i.06, 1
153 %tobool = icmp eq i32 %shr, 0
154 br i1 %tobool, label %while.end.loopexit, label %while.body
156 while.end.loopexit: ; preds = %while.body
159 while.end: ; preds = %while.end.loopexit, %entry
160 %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
164 ; Recognize CTLZ builtin pattern.
165 ; Here it will replace the loop -
166 ; assume builtin is always profitable.
170 ; n = n >= 0 ? n : -n;
179 ; ALL: %0 = ashr i32 %abs_n, 1
180 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
181 ; ALL-NEXT: %2 = sub i32 32, %1
182 ; ALL-NEXT: %3 = add i32 %2, 1
183 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
184 ; ALL: ret i32 %i.0.lcssa
186 ; Function Attrs: norecurse nounwind readnone uwtable
187 define i32 @ctlz(i32 %n) {
189 %c = icmp sgt i32 %n, 0
190 %negn = sub nsw i32 0, %n
191 %abs_n = select i1 %c, i32 %n, i32 %negn
194 while.cond: ; preds = %while.cond, %entry
195 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
196 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
197 %shr = ashr i32 %n.addr.0, 1
198 %tobool = icmp eq i32 %shr, 0
199 %inc = add nsw i32 %i.0, 1
200 br i1 %tobool, label %while.end, label %while.cond
202 while.end: ; preds = %while.cond
206 ; Recognize CTLZ builtin pattern.
207 ; Here it will replace the loop -
208 ; assume builtin is always profitable.
210 ; int ctlz_lshr(int n)
220 ; ALL: %0 = lshr i32 %n, 1
221 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
222 ; ALL-NEXT: %2 = sub i32 32, %1
223 ; ALL-NEXT: %3 = add i32 %2, 1
224 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
225 ; ALL: ret i32 %i.0.lcssa
227 ; Function Attrs: norecurse nounwind readnone uwtable
228 define i32 @ctlz_lshr(i32 %n) {
232 while.cond: ; preds = %while.cond, %entry
233 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
234 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
235 %shr = lshr i32 %n.addr.0, 1
236 %tobool = icmp eq i32 %shr, 0
237 %inc = add nsw i32 %i.0, 1
238 br i1 %tobool, label %while.end, label %while.cond
240 while.end: ; preds = %while.cond
244 ; Recognize CTLZ builtin pattern.
245 ; Here it will replace the loop -
246 ; assume builtin is always profitable.
248 ; int ctlz_add(int n, int i0)
250 ; n = n >= 0 ? n : -n;
259 ; ALL: %0 = ashr i32 %abs_n, 1
260 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
261 ; ALL-NEXT: %2 = sub i32 32, %1
262 ; ALL-NEXT: %3 = add i32 %2, 1
263 ; ALL-NEXT: %4 = add i32 %2, %i0
264 ; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
265 ; ALL: ret i32 %i.0.lcssa
267 ; Function Attrs: norecurse nounwind readnone uwtable
268 define i32 @ctlz_add(i32 %n, i32 %i0) {
270 %c = icmp sgt i32 %n, 0
271 %negn = sub nsw i32 0, %n
272 %abs_n = select i1 %c, i32 %n, i32 %negn
275 while.cond: ; preds = %while.cond, %entry
276 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
277 %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
278 %shr = ashr i32 %n.addr.0, 1
279 %tobool = icmp eq i32 %shr, 0
280 %inc = add nsw i32 %i.0, 1
281 br i1 %tobool, label %while.end, label %while.cond
283 while.end: ; preds = %while.cond
287 ; Recognize CTLZ builtin pattern.
288 ; Here it will replace the loop -
289 ; assume builtin is always profitable.
291 ; int ctlz_add_lshr(int n, int i0)
301 ; ALL: %0 = lshr i32 %n, 1
302 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
303 ; ALL-NEXT: %2 = sub i32 32, %1
304 ; ALL-NEXT: %3 = add i32 %2, 1
305 ; ALL-NEXT: %4 = add i32 %2, %i0
306 ; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
307 ; ALL: ret i32 %i.0.lcssa
309 ; Function Attrs: norecurse nounwind readnone uwtable
310 define i32 @ctlz_add_lshr(i32 %n, i32 %i0) {
314 while.cond: ; preds = %while.cond, %entry
315 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
316 %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
317 %shr = lshr i32 %n.addr.0, 1
318 %tobool = icmp eq i32 %shr, 0
319 %inc = add nsw i32 %i.0, 1
320 br i1 %tobool, label %while.end, label %while.cond
322 while.end: ; preds = %while.cond
326 ; Recognize CTLZ builtin pattern.
327 ; Here it will replace the loop -
328 ; assume builtin is always profitable.
330 ; int ctlz_sext(short in)
343 ; ALL: %0 = ashr i32 %abs_n, 1
344 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
345 ; ALL-NEXT: %2 = sub i32 32, %1
346 ; ALL-NEXT: %3 = add i32 %2, 1
347 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
348 ; ALL: ret i32 %i.0.lcssa
350 ; Function Attrs: norecurse nounwind readnone uwtable
351 define i32 @ctlz_sext(i16 %in) {
353 %n = sext i16 %in to i32
354 %c = icmp sgt i16 %in, 0
355 %negn = sub nsw i32 0, %n
356 %abs_n = select i1 %c, i32 %n, i32 %negn
359 while.cond: ; preds = %while.cond, %entry
360 %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
361 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
362 %shr = ashr i32 %n.addr.0, 1
363 %tobool = icmp eq i32 %shr, 0
364 %inc = add nsw i32 %i.0, 1
365 br i1 %tobool, label %while.end, label %while.cond
367 while.end: ; preds = %while.cond
371 ; Recognize CTLZ builtin pattern.
372 ; Here it will replace the loop -
373 ; assume builtin is always profitable.
375 ; int ctlz_sext_lshr(short in)
385 ; ALL: %0 = lshr i32 %n, 1
386 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
387 ; ALL-NEXT: %2 = sub i32 32, %1
388 ; ALL-NEXT: %3 = add i32 %2, 1
389 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
390 ; ALL: ret i32 %i.0.lcssa
392 ; Function Attrs: norecurse nounwind readnone uwtable
393 define i32 @ctlz_sext_lshr(i16 %in) {
395 %n = sext i16 %in to i32
398 while.cond: ; preds = %while.cond, %entry
399 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
400 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
401 %shr = lshr i32 %n.addr.0, 1
402 %tobool = icmp eq i32 %shr, 0
403 %inc = add nsw i32 %i.0, 1
404 br i1 %tobool, label %while.end, label %while.cond
406 while.end: ; preds = %while.cond
410 ; This loop contains a volatile store. If x is initially negative,
411 ; the code will be an infinite loop because the ashr will eventually produce
412 ; all ones and continue doing so. This prevents the loop from terminating. If
413 ; we convert this to a countable loop using ctlz that loop will only run 32
414 ; times. This is different than the infinite number of times of the original.
415 define i32 @foo(i32 %x) {
418 ; LZCNT-NEXT: [[V:%.*]] = alloca i8, align 1
419 ; LZCNT-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[X:%.*]], 0
420 ; LZCNT-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_LR_PH:%.*]]
421 ; LZCNT: while.body.lr.ph:
422 ; LZCNT-NEXT: br label [[WHILE_BODY:%.*]]
424 ; LZCNT-NEXT: [[CNT_06:%.*]] = phi i32 [ 0, [[WHILE_BODY_LR_PH]] ], [ [[INC:%.*]], [[WHILE_BODY]] ]
425 ; LZCNT-NEXT: [[X_ADDR_05:%.*]] = phi i32 [ [[X]], [[WHILE_BODY_LR_PH]] ], [ [[SHR:%.*]], [[WHILE_BODY]] ]
426 ; LZCNT-NEXT: [[SHR]] = ashr i32 [[X_ADDR_05]], 1
427 ; LZCNT-NEXT: [[INC]] = add i32 [[CNT_06]], 1
428 ; LZCNT-NEXT: store volatile i8 42, i8* [[V]], align 1
429 ; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
430 ; LZCNT-NEXT: br i1 [[TOBOOL]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]], label [[WHILE_BODY]]
431 ; LZCNT: while.cond.while.end_crit_edge:
432 ; LZCNT-NEXT: [[SPLIT:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
433 ; LZCNT-NEXT: br label [[WHILE_END]]
435 ; LZCNT-NEXT: [[CNT_0_LCSSA:%.*]] = phi i32 [ [[SPLIT]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
436 ; LZCNT-NEXT: ret i32 [[CNT_0_LCSSA]]
438 ; NOLZCNT-LABEL: @foo(
439 ; NOLZCNT-NEXT: entry:
440 ; NOLZCNT-NEXT: [[V:%.*]] = alloca i8, align 1
441 ; NOLZCNT-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[X:%.*]], 0
442 ; NOLZCNT-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_LR_PH:%.*]]
443 ; NOLZCNT: while.body.lr.ph:
444 ; NOLZCNT-NEXT: br label [[WHILE_BODY:%.*]]
445 ; NOLZCNT: while.body:
446 ; NOLZCNT-NEXT: [[CNT_06:%.*]] = phi i32 [ 0, [[WHILE_BODY_LR_PH]] ], [ [[INC:%.*]], [[WHILE_BODY]] ]
447 ; NOLZCNT-NEXT: [[X_ADDR_05:%.*]] = phi i32 [ [[X]], [[WHILE_BODY_LR_PH]] ], [ [[SHR:%.*]], [[WHILE_BODY]] ]
448 ; NOLZCNT-NEXT: [[SHR]] = ashr i32 [[X_ADDR_05]], 1
449 ; NOLZCNT-NEXT: [[INC]] = add i32 [[CNT_06]], 1
450 ; NOLZCNT-NEXT: store volatile i8 42, i8* [[V]], align 1
451 ; NOLZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
452 ; NOLZCNT-NEXT: br i1 [[TOBOOL]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]], label [[WHILE_BODY]]
453 ; NOLZCNT: while.cond.while.end_crit_edge:
454 ; NOLZCNT-NEXT: [[SPLIT:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
455 ; NOLZCNT-NEXT: br label [[WHILE_END]]
456 ; NOLZCNT: while.end:
457 ; NOLZCNT-NEXT: [[CNT_0_LCSSA:%.*]] = phi i32 [ [[SPLIT]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
458 ; NOLZCNT-NEXT: ret i32 [[CNT_0_LCSSA]]
461 %v = alloca i8, align 1
462 %tobool4 = icmp eq i32 %x, 0
463 br i1 %tobool4, label %while.end, label %while.body.lr.ph
465 while.body.lr.ph: ; preds = %entry
468 while.body: ; preds = %while.body.lr.ph, %while.body
469 %cnt.06 = phi i32 [ 0, %while.body.lr.ph ], [ %inc, %while.body ]
470 %x.addr.05 = phi i32 [ %x, %while.body.lr.ph ], [ %shr, %while.body ]
471 %shr = ashr i32 %x.addr.05, 1
472 %inc = add i32 %cnt.06, 1
473 store volatile i8 42, i8* %v, align 1
474 %tobool = icmp eq i32 %shr, 0
475 br i1 %tobool, label %while.cond.while.end_crit_edge, label %while.body
477 while.cond.while.end_crit_edge: ; preds = %while.body
478 %split = phi i32 [ %inc, %while.body ]
481 while.end: ; preds = %while.cond.while.end_crit_edge, %entry
482 %cnt.0.lcssa = phi i32 [ %split, %while.cond.while.end_crit_edge ], [ 0, %entry ]
486 ; We can't easily transform this loop. It returns 1 for an input of both
489 ; int ctlz_bad(unsigned n)
499 ; Function Attrs: norecurse nounwind readnone uwtable
500 define i32 @ctlz_bad(i32 %n) {
501 ; ALL-LABEL: @ctlz_bad(
503 ; ALL-NEXT: br label [[WHILE_COND:%.*]]
505 ; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
506 ; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
507 ; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
508 ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
509 ; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
510 ; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
512 ; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ]
513 ; ALL-NEXT: ret i32 [[INC_LCSSA]]
518 while.cond: ; preds = %while.cond, %entry
519 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
520 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
521 %shr = lshr i32 %n.addr.0, 1
522 %tobool = icmp eq i32 %shr, 0
523 %inc = add nsw i32 %i.0, 1
524 br i1 %tobool, label %while.end, label %while.cond
526 while.end: ; preds = %while.cond