1 ; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \
3 ; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | \
4 ; RUN: llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
5 ; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \
6 ; RUN: -pass-remarks-analysis=hardware-loops %s -S -o - 2>&1 | \
7 ; RUN: FileCheck %s --check-prefix=CHECK-REMARKS
10 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
11 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
12 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
13 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
14 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
15 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
16 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
17 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
18 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
19 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
22 ; CHECK-LABEL: early_exit
23 ; CHECK-NOT: llvm.set.loop.iterations
24 ; CHECK-NOT: llvm.loop.decrement
25 define i32 @early_exit(i32* nocapture readonly %a, i32 %max, i32 %n) {
30 %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
31 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.0
32 %0 = load i32, i32* %arrayidx, align 4
33 %cmp = icmp sgt i32 %0, %max
34 br i1 %cmp, label %do.end, label %if.end
37 %inc = add nuw i32 %i.0, 1
38 %cmp1 = icmp ult i32 %inc, %n
39 br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge
41 if.end.do.end_crit_edge:
42 %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i32 %inc
43 %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
47 %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ]
52 ; CHECK-NOT: call i32 @llvm.start.loop.iterations.i32(i32 %N)
53 ; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
55 ; CHECK: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 %N)
56 ; CHECK: br label %while.body3.us
58 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[START]], %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ]
59 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[REM]], i32 1)
60 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
61 ; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us
63 ; CHECK-NOT: %{{[^ ]+}} = call i1 @llvm.loop.decrement.i32(i32 1)
65 define void @nested(i32* nocapture %A, i32 %N) {
67 %cmp20 = icmp eq i32 %N, 0
68 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
70 while.cond1.preheader.us:
71 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
72 %mul.us = mul i32 %i.021.us, %N
73 br label %while.body3.us
76 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
77 %add.us = add i32 %j.019.us, %mul.us
78 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
79 store i32 %add.us, i32* %arrayidx.us, align 4
80 %inc.us = add nuw i32 %j.019.us, 1
81 %exitcond = icmp eq i32 %inc.us, %N
82 br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us
84 while.cond1.while.end_crit_edge.us:
85 %inc6.us = add nuw i32 %i.021.us, 1
86 %exitcond23 = icmp eq i32 %inc6.us, %N
87 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
93 ; CHECK-LABEL: pre_existing
94 ; CHECK: llvm.start.loop.iterations
95 ; CHECK-NOT: llvm.start.loop.iterations
96 ; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
97 ; CHECK-NOT: call i32 @llvm.loop.decrement.reg
98 define i32 @pre_existing(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
100 %start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
103 while.body: ; preds = %while.body, %entry
104 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %entry ]
105 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %entry ]
106 %0 = phi i32 [ %start, %entry ], [ %2, %while.body ]
107 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
108 %1 = load i32, i32* %q.addr.05, align 4
109 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
110 store i32 %1, i32* %p.addr.04, align 4
111 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
112 %3 = icmp ne i32 %2, 0
113 br i1 %3, label %while.body, label %while.end
115 while.end: ; preds = %while.body
119 ; CHECK-LABEL: pre_existing_test_set
120 ; CHECK: call { i32, i1 } @llvm.test.start.loop.iterations
121 ; CHECK-NOT: llvm.set{{.*}}.loop.iterations
122 ; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
123 ; CHECK-NOT: call i32 @llvm.loop.decrement.reg
124 define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
126 %guard = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %n)
127 %g0 = extractvalue { i32, i1 } %guard, 0
128 %g1 = extractvalue { i32, i1 } %guard, 1
129 br i1 %g1, label %while.preheader, label %while.end
134 while.body: ; preds = %while.body, %entry
135 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ]
136 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ]
137 %0 = phi i32 [ %g0, %while.preheader ], [ %2, %while.body ]
138 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
139 %1 = load i32, i32* %q.addr.05, align 4
140 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
141 store i32 %1, i32* %p.addr.04, align 4
142 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
143 %3 = icmp ne i32 %2, 0
144 br i1 %3, label %while.body, label %while.end
146 while.end: ; preds = %while.body
150 ; CHECK-LABEL: pre_existing_inner
151 ; CHECK-NOT: llvm.start.loop.iterations
152 ; CHECK: while.cond1.preheader.us:
153 ; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
154 ; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
156 ; CHECK-NOT: call i32 @llvm.loop.decrement
157 define void @pre_existing_inner(i32* nocapture %A, i32 %N) {
159 %cmp20 = icmp eq i32 %N, 0
160 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
162 while.cond1.preheader.us:
163 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
164 %mul.us = mul i32 %i.021.us, %N
165 %start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
166 br label %while.body3.us
169 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
170 %0 = phi i32 [ %start, %while.cond1.preheader.us ], [ %1, %while.body3.us ]
171 %add.us = add i32 %j.019.us, %mul.us
172 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
173 store i32 %add.us, i32* %arrayidx.us, align 4
174 %inc.us = add nuw i32 %j.019.us, 1
175 %1 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
176 %2 = icmp ne i32 %1, 0
177 br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us
179 while.cond1.while.end_crit_edge.us:
180 %inc6.us = add nuw i32 %i.021.us, 1
181 %exitcond23 = icmp eq i32 %inc6.us, %N
182 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
188 ; CHECK-LABEL: not_rotated
189 ; CHECK-NOT: call i32 @llvm.start.loop.iterations
190 ; CHECK-NOT: call i32 @llvm.loop.decrement.i32
191 define void @not_rotated(i32, i16* nocapture, i16 signext) {
195 %5 = phi i32 [ 0, %3 ], [ %19, %18 ]
196 %6 = icmp eq i32 %5, %0
197 br i1 %6, label %20, label %7
204 %10 = phi i32 [ %17, %12 ], [ 0, %7 ]
205 %11 = icmp eq i32 %10, %0
206 br i1 %11, label %18, label %12
209 %13 = add i32 %10, %8
210 %14 = getelementptr inbounds i16, i16* %1, i32 %13
211 %15 = load i16, i16* %14, align 2
212 %16 = add i16 %15, %2
213 store i16 %16, i16* %14, align 2
225 ; CHECK-LABEL: multi_latch
226 ; CHECK-NOT: call i32 @llvm.start.loop.iterations
227 ; CHECK-NOT: call i32 @llvm.loop.decrement
228 define void @multi_latch(i32* %a, i32* %b, i32 %N) {
230 %half = lshr i32 %N, 1
234 %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ]
235 %cmp = icmp ult i32 %iv, %half
236 %addr.a = getelementptr i32, i32* %a, i32 %iv
237 %addr.b = getelementptr i32, i32* %b, i32 %iv
238 br i1 %cmp, label %if.then, label %if.else
241 store i32 %iv, i32* %addr.a
245 store i32 %iv, i32* %addr.b
249 %count.next = add nuw i32 %iv, 1
250 %cmp.1 = icmp ult i32 %count.next, %half
251 br i1 %cmp.1, label %header, label %latch.1
254 %ld = load i32, i32* %addr.a
255 store i32 %ld, i32* %addr.b
256 %cmp.2 = icmp ult i32 %count.next, %N
257 br i1 %cmp.2, label %header, label %latch.1
263 ; CHECK-LABEL: search
265 ; CHECK: [[TEST1:%[^ ]+]] = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %N)
266 ; CHECK: [[TEST:%[^ ]+]] = extractvalue { i32, i1 } [[TEST1]], 1
267 ; CHECK: br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup
268 ; CHECK: for.body.preheader:
269 ; CHECK: br label %for.body
272 ; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32(
273 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
274 ; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup
275 define i32 @search(i8* nocapture readonly %c, i32 %N) {
277 %cmp11 = icmp eq i32 %N, 0
278 br i1 %cmp11, label %for.cond.cleanup, label %for.body
281 %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
282 %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
283 %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
287 %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ]
288 %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ]
289 %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ]
290 %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014
291 %0 = load i8, i8* %arrayidx, align 1
292 switch i8 %0, label %for.inc [
299 sw.bb: ; preds = %for.body, %for.body, %for.body
300 %inc = add nsw i32 %found.012, 1
303 sw.bb1: ; preds = %for.body
304 %inc2 = add nsw i32 %spaces.013, 1
307 for.inc: ; preds = %sw.bb, %sw.bb1, %for.body
308 %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
309 %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
310 %inc3 = add nuw i32 %i.014, 1
311 %exitcond = icmp eq i32 %inc3, %N
312 br i1 %exitcond, label %for.cond.cleanup, label %for.body
315 ; CHECK-LABEL: unroll_inc_int
316 ; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
317 ; CHECK: call i32 @llvm.loop.decrement.reg.i32(
319 ; TODO: We should be able to support the unrolled loop body.
320 ; CHECK-UNROLL-LABEL: unroll_inc_int
321 ; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
322 ; CHECK-UNROLL-NOT: dls
323 ; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body
324 ; CHECK-UNROLL-NOT: le lr, [[LOOP]]
325 ; CHECK-UNROLL: bne [[LOOP]]
326 ; CHECK-UNROLL: wls lr, r12, [[EXIT:.LBB[0-9_]+]]
327 ; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
328 ; CHECK-UNROLL: le lr, [[EPIL]]
329 ; CHECK-UNROLL-NEXT: [[EXIT]]
331 define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
333 %cmp8 = icmp sgt i32 %N, 0
334 br i1 %cmp8, label %for.body, label %for.cond.cleanup
340 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
341 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
342 %0 = load i32, i32* %arrayidx, align 4
343 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
344 %1 = load i32, i32* %arrayidx1, align 4
345 %mul = mul nsw i32 %1, %0
346 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
347 store i32 %mul, i32* %arrayidx2, align 4
348 %inc = add nuw nsw i32 %i.09, 1
349 %exitcond = icmp eq i32 %inc, %N
350 br i1 %exitcond, label %for.cond.cleanup, label %for.body
353 ; CHECK-LABEL: unroll_inc_unsigned
354 ; CHECK: call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %N)
355 ; CHECK: call i32 @llvm.loop.decrement.reg.i32(
357 ; TODO: We should be able to support the unrolled loop body.
358 ; CHECK-UNROLL-LABEL: unroll_inc_unsigned
359 ; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
360 ; CHECK-UNROLL-NOT: dls
361 ; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body
362 ; CHECK-UNROLL-NOT: le lr, [[LOOP]]
363 ; CHECK-UNROLL: bne [[LOOP]]
364 ; CHECK-UNROLL: wls lr, r12, [[EPIL_EXIT:.LBB[0-9_]+]]
365 ; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
366 ; CHECK-UNROLL: le lr, [[EPIL]]
367 ; CHECK-UNROLL: [[EPIL_EXIT]]:
369 define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
371 %cmp8 = icmp eq i32 %N, 0
372 br i1 %cmp8, label %for.cond.cleanup, label %for.body
378 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
379 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
380 %0 = load i32, i32* %arrayidx, align 4
381 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
382 %1 = load i32, i32* %arrayidx1, align 4
383 %mul = mul nsw i32 %1, %0
384 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
385 store i32 %mul, i32* %arrayidx2, align 4
386 %inc = add nuw i32 %i.09, 1
387 %exitcond = icmp eq i32 %inc, %N
388 br i1 %exitcond, label %for.cond.cleanup, label %for.body
391 ; CHECK-LABEL: unroll_dec_int
392 ; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
393 ; CHECK: call i32 @llvm.loop.decrement.reg.i32(
395 ; CHECK-UNROLL-LABEL: unroll_dec_int:
396 ; CHECK-UNROLL: wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]]
397 ; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]:
398 ; CHECK-UNROLL: le lr, [[PROLOGUE]]
399 ; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]:
400 ; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]:
401 ; CHECK-UNROLL: le lr, [[BODY]]
402 ; CHECK-UNROLL-NOT: b
404 define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
406 %cmp8 = icmp sgt i32 %N, 0
407 br i1 %cmp8, label %for.body, label %for.cond.cleanup
413 %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ]
414 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
415 %0 = load i32, i32* %arrayidx, align 4
416 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
417 %1 = load i32, i32* %arrayidx1, align 4
418 %mul = mul nsw i32 %1, %0
419 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
420 store i32 %mul, i32* %arrayidx2, align 4
421 %dec = add nsw i32 %i.09, -1
422 %cmp = icmp sgt i32 %dec, 0
423 br i1 %cmp, label %for.body, label %for.cond.cleanup
426 declare i32 @llvm.start.loop.iterations.i32(i32) #0
427 declare { i32, i1 } @llvm.test.start.loop.iterations.i32(i32) #0
428 declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #0