1 ; RUN: opt -S -loop-reroll %s | FileCheck %s
2 target triple = "aarch64--linux-gnu"
4 define void @rerollable1([2 x i32]* nocapture %a) {
11 ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
12 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv
13 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv
14 ; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
15 ; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4
18 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
20 ; NO unrerollable instructions
22 ; extra simple arithmetic operations, used by root instructions
23 %plus20 = add nuw nsw i64 %iv, 20
24 %plus10 = add nuw nsw i64 %iv, 10
27 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
28 %value0 = load i32, i32* %ldptr0, align 4
29 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
30 store i32 %value0, i32* %stptr0, align 4
33 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
34 %value1 = load i32, i32* %ldptr1, align 4
35 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
36 store i32 %value1, i32* %stptr1, align 4
39 %iv.next = add nuw nsw i64 %iv, 1
42 %exitcond = icmp eq i64 %iv.next, 5
43 br i1 %exitcond, label %exit, label %loop
49 define void @unrerollable1([2 x i32]* nocapture %a) {
56 ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
57 ; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
58 ; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
61 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
63 ; unrerollable instructions using %iv
64 %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
65 store i32 999, i32* %stptrx, align 4
67 ; extra simple arithmetic operations, used by root instructions
68 %plus20 = add nuw nsw i64 %iv, 20
69 %plus10 = add nuw nsw i64 %iv, 10
72 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
73 %value0 = load i32, i32* %ldptr0, align 4
74 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
75 store i32 %value0, i32* %stptr0, align 4
78 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
79 %value1 = load i32, i32* %ldptr1, align 4
80 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
81 store i32 %value1, i32* %stptr1, align 4
84 %iv.next = add nuw nsw i64 %iv, 1
87 %exitcond = icmp eq i64 %iv.next, 5
88 br i1 %exitcond, label %exit, label %loop
94 define void @unrerollable2([2 x i32]* nocapture %a) {
101 ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
102 ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
103 ; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
104 ; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
107 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
110 %iv.next = add nuw nsw i64 %iv, 1
112 ; unrerollable instructions using %iv.next
113 %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
114 store i32 999, i32* %stptrx, align 4
116 ; extra simple arithmetic operations, used by root instructions
117 %plus20 = add nuw nsw i64 %iv, 20
118 %plus10 = add nuw nsw i64 %iv, 10
121 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
122 %value0 = load i32, i32* %ldptr0, align 4
123 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
124 store i32 %value0, i32* %stptr0, align 4
127 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
128 %value1 = load i32, i32* %ldptr1, align 4
129 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
130 store i32 %value1, i32* %stptr1, align 4
133 %exitcond = icmp eq i64 %iv.next, 5
134 br i1 %exitcond, label %exit, label %loop
140 define dso_local void @rerollable2() {
147 ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
148 ; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
149 ; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
152 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
155 %iv.mul3 = mul nuw nsw i32 %iv, 3
157 ; extra simple arithmetic operations, used by root instructions
158 %iv.scaled = add nuw nsw i32 %iv.mul3, 20
160 ; NO unrerollable instructions
165 %iv.scaled.div5 = udiv i32 %iv.scaled, 5
166 tail call void @bar(i32 %iv.scaled.div5)
168 %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
169 %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
170 tail call void @bar(i32 %iv.scaled.add1.div5)
172 %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
173 %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
174 tail call void @bar(i32 %iv.scaled.add2.div5)
179 %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
180 %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
181 tail call void @bar(i32 %iv.scaled.add4.div5)
183 %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
184 %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
185 tail call void @bar(i32 %iv.scaled.add5.div5)
187 %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
188 %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
189 tail call void @bar(i32 %iv.scaled.add6.div5)
192 %iv.next = add nuw nsw i32 %iv, 1
195 %cmp = icmp ult i32 %iv.next, 3
196 br i1 %cmp, label %loop, label %exit
202 define dso_local void @unrerollable3() {
209 ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
210 ; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3
211 ; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20
212 ; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7
213 ; CHECK-NEXT: tail call void @bar(i32 %iv.mul7)
216 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
219 %iv.mul3 = mul nuw nsw i32 %iv, 3
221 ; extra simple arithmetic operations, used by root instructions
222 %iv.scaled = add nuw nsw i32 %iv.mul3, 20
224 ; unrerollable instructions using %iv
225 %iv.mul7 = mul nuw nsw i32 %iv, 7
226 tail call void @bar(i32 %iv.mul7)
231 %iv.scaled.div5 = udiv i32 %iv.scaled, 5
232 tail call void @bar(i32 %iv.scaled.div5)
234 %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
235 %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
236 tail call void @bar(i32 %iv.scaled.add1.div5)
238 %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
239 %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
240 tail call void @bar(i32 %iv.scaled.add2.div5)
245 %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
246 %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
247 tail call void @bar(i32 %iv.scaled.add4.div5)
249 %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
250 %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
251 tail call void @bar(i32 %iv.scaled.add5.div5)
253 %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
254 %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
255 tail call void @bar(i32 %iv.scaled.add6.div5)
258 %iv.next = add nuw nsw i32 %iv, 1
261 %cmp = icmp ult i32 %iv.next, 3
262 br i1 %cmp, label %loop, label %exit
268 declare dso_local void @bar(i32)