1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -passes=loop-fusion < %s | FileCheck %s
4 @B = common global [1024 x i32] zeroinitializer, align 16
6 define void @dep_free_parametric(ptr noalias %A, i64 %N) {
7 ; CHECK-LABEL: @dep_free_parametric(
9 ; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i64 0, [[N:%.*]]
10 ; CHECK-NEXT: [[CMP31:%.*]] = icmp slt i64 0, [[N]]
11 ; CHECK-NEXT: br i1 [[CMP4]], label [[BB3:%.*]], label [[BB12:%.*]]
13 ; CHECK-NEXT: br label [[BB5:%.*]]
15 ; CHECK-NEXT: [[I_05:%.*]] = phi i64 [ [[INC:%.*]], [[BB5]] ], [ 0, [[BB3]] ]
16 ; CHECK-NEXT: [[I1_02:%.*]] = phi i64 [ [[INC14:%.*]], [[BB5]] ], [ 0, [[BB3]] ]
17 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[I_05]], 3
18 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[I_05]], 3
19 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[SUB]], [[ADD]]
20 ; CHECK-NEXT: [[REM:%.*]] = srem i64 [[MUL]], [[I_05]]
21 ; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[REM]] to i32
22 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_05]]
23 ; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4
24 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_05]], 1
25 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
26 ; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i64 [[I1_02]], 3
27 ; CHECK-NEXT: [[ADD8:%.*]] = add nsw i64 [[I1_02]], 3
28 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i64 [[SUB7]], [[ADD8]]
29 ; CHECK-NEXT: [[REM10:%.*]] = srem i64 [[MUL9]], [[I1_02]]
30 ; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[REM10]] to i32
31 ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 [[I1_02]]
32 ; CHECK-NEXT: store i32 [[CONV11]], ptr [[ARRAYIDX12]], align 4
33 ; CHECK-NEXT: [[INC14]] = add nsw i64 [[I1_02]], 1
34 ; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i64 [[INC14]], [[N]]
35 ; CHECK-NEXT: br i1 [[CMP3]], label [[BB5]], label [[BB15:%.*]]
37 ; CHECK-NEXT: br label [[BB12]]
39 ; CHECK-NEXT: ret void
42 %cmp4 = icmp slt i64 0, %N
43 br i1 %cmp4, label %bb3, label %bb14
48 bb5: ; preds = %bb3, %bb5
49 %i.05 = phi i64 [ %inc, %bb5 ], [ 0, %bb3 ]
50 %sub = sub nsw i64 %i.05, 3
51 %add = add nsw i64 %i.05, 3
52 %mul = mul nsw i64 %sub, %add
53 %rem = srem i64 %mul, %i.05
54 %conv = trunc i64 %rem to i32
55 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.05
56 store i32 %conv, ptr %arrayidx, align 4
57 %inc = add nsw i64 %i.05, 1
58 %cmp = icmp slt i64 %inc, %N
59 br i1 %cmp, label %bb5, label %bb10
64 bb14: ; preds = %bb10, %entry
65 %cmp31 = icmp slt i64 0, %N
66 br i1 %cmp31, label %bb8, label %bb12
71 bb9: ; preds = %bb8, %bb9
72 %i1.02 = phi i64 [ %inc14, %bb9 ], [ 0, %bb8 ]
73 %sub7 = sub nsw i64 %i1.02, 3
74 %add8 = add nsw i64 %i1.02, 3
75 %mul9 = mul nsw i64 %sub7, %add8
76 %rem10 = srem i64 %mul9, %i1.02
77 %conv11 = trunc i64 %rem10 to i32
78 %arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.02
79 store i32 %conv11, ptr %arrayidx12, align 4
80 %inc14 = add nsw i64 %i1.02, 1
81 %cmp3 = icmp slt i64 %inc14, %N
82 br i1 %cmp3, label %bb9, label %bb15
87 bb12: ; preds = %bb15, %bb14
91 ; Test that `%add` is moved in for.first.preheader, and the two loops for.first
92 ; and for.second are fused.
94 define void @moveinsts_preheader(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
95 ; CHECK-LABEL: @moveinsts_preheader(
96 ; CHECK-NEXT: for.first.guard:
97 ; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
98 ; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
99 ; CHECK: for.first.preheader:
100 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1
101 ; CHECK-NEXT: br label [[FOR_FIRST:%.*]]
103 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC_I:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
104 ; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[INC_J:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
105 ; CHECK-NEXT: [[AI:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I]]
106 ; CHECK-NEXT: store i32 0, ptr [[AI]], align 4
107 ; CHECK-NEXT: [[INC_I]] = add nsw i64 [[I]], 1
108 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i64 [[INC_I]], [[N]]
109 ; CHECK-NEXT: [[BJ:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J]]
110 ; CHECK-NEXT: store i32 0, ptr [[BJ]], align 4
111 ; CHECK-NEXT: [[INC_J]] = add nsw i64 [[J]], 1
112 ; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC_J]], [[N]]
113 ; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
114 ; CHECK: for.second.exit:
115 ; CHECK-NEXT: br label [[FOR_END]]
117 ; CHECK-NEXT: ret void
120 %cmp.guard = icmp slt i64 0, %N
121 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
127 %i = phi i64 [ %inc.i, %for.first ], [ 0, %for.first.preheader ]
128 %Ai = getelementptr inbounds i32, ptr %A, i64 %i
129 store i32 0, ptr %Ai, align 4
130 %inc.i = add nsw i64 %i, 1
131 %cmp.i = icmp slt i64 %inc.i, %N
132 br i1 %cmp.i, label %for.first, label %for.first.exit
135 br label %for.second.guard
138 br i1 %cmp.guard, label %for.second.preheader, label %for.end
140 for.second.preheader:
141 %add = add nsw i32 %x, 1
145 %j = phi i64 [ %inc.j, %for.second ], [ 0, %for.second.preheader ]
146 %Bj = getelementptr inbounds i32, ptr %B, i64 %j
147 store i32 0, ptr %Bj, align 4
148 %inc.j = add nsw i64 %j, 1
149 %cmp.j = icmp slt i64 %inc.j, %N
150 br i1 %cmp.j, label %for.second, label %for.second.exit
159 ; Test that `%add` is moved in for.second.exit, and the two loops for.first
160 ; and for.second are fused.
162 define void @moveinsts_exitblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
163 ; CHECK-LABEL: @moveinsts_exitblock(
164 ; CHECK-NEXT: for.first.guard:
165 ; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
166 ; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
167 ; CHECK: for.first.preheader:
168 ; CHECK-NEXT: br label [[FOR_FIRST:%.*]]
170 ; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
171 ; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
172 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]]
173 ; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
174 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1
175 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
176 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]]
177 ; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4
178 ; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1
179 ; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]]
180 ; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
181 ; CHECK: for.second.exit:
182 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1
183 ; CHECK-NEXT: br label [[FOR_END]]
185 ; CHECK-NEXT: ret void
188 %cmp.guard = icmp slt i64 0, %N
189 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
195 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
196 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04
197 store i32 0, ptr %arrayidx, align 4
198 %inc = add nsw i64 %i.04, 1
199 %cmp = icmp slt i64 %inc, %N
200 br i1 %cmp, label %for.first, label %for.first.exit
203 %add = add nsw i32 %x, 1
204 br label %for.second.guard
207 br i1 %cmp.guard, label %for.second.preheader, label %for.end
209 for.second.preheader:
213 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
214 %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02
215 store i32 0, ptr %arrayidx4, align 4
216 %inc6 = add nsw i64 %j.02, 1
217 %cmp.j = icmp slt i64 %inc6, %N
218 br i1 %cmp.j, label %for.second, label %for.second.exit
227 ; Test that `%add` is moved in for.first.guard, and the two loops for.first
228 ; and for.second are fused.
230 define void @moveinsts_guardblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
231 ; CHECK-LABEL: @moveinsts_guardblock(
232 ; CHECK-NEXT: for.first.guard:
233 ; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
234 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1
235 ; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
236 ; CHECK: for.first.preheader:
237 ; CHECK-NEXT: br label [[FOR_FIRST:%.*]]
239 ; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
240 ; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
241 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]]
242 ; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
243 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1
244 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
245 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]]
246 ; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4
247 ; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1
248 ; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]]
249 ; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
250 ; CHECK: for.second.exit:
251 ; CHECK-NEXT: br label [[FOR_END]]
253 ; CHECK-NEXT: ret void
256 %cmp.guard = icmp slt i64 0, %N
257 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
263 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
264 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04
265 store i32 0, ptr %arrayidx, align 4
266 %inc = add nsw i64 %i.04, 1
267 %cmp = icmp slt i64 %inc, %N
268 br i1 %cmp, label %for.first, label %for.first.exit
271 br label %for.second.guard
274 %add = add nsw i32 %x, 1
275 br i1 %cmp.guard, label %for.second.preheader, label %for.end
277 for.second.preheader:
281 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
282 %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02
283 store i32 0, ptr %arrayidx4, align 4
284 %inc6 = add nsw i64 %j.02, 1
285 %cmp.j = icmp slt i64 %inc6, %N
286 br i1 %cmp.j, label %for.second, label %for.second.exit
295 ; Test that the incoming block of `%j.lcssa` is updated correctly
296 ; from for.second.guard to for.first.guard, and the two loops for.first and
297 ; for.second are fused.
299 define i64 @updatephi_guardnonloopblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
300 ; CHECK-LABEL: @updatephi_guardnonloopblock(
301 ; CHECK-NEXT: for.first.guard:
302 ; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
303 ; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
304 ; CHECK: for.first.preheader:
305 ; CHECK-NEXT: br label [[FOR_FIRST:%.*]]
307 ; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
308 ; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
309 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]]
310 ; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
311 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1
312 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
313 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]]
314 ; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4
315 ; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1
316 ; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]]
317 ; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
318 ; CHECK: for.second.exit:
319 ; CHECK-NEXT: br label [[FOR_END]]
321 ; CHECK-NEXT: [[J_LCSSA:%.*]] = phi i64 [ 0, [[FOR_FIRST_GUARD:%.*]] ], [ [[J_02]], [[FOR_SECOND_EXIT]] ]
322 ; CHECK-NEXT: ret i64 [[J_LCSSA]]
325 %cmp.guard = icmp slt i64 0, %N
326 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
332 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
333 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04
334 store i32 0, ptr %arrayidx, align 4
335 %inc = add nsw i64 %i.04, 1
336 %cmp = icmp slt i64 %inc, %N
337 br i1 %cmp, label %for.first, label %for.first.exit
340 br label %for.second.guard
343 br i1 %cmp.guard, label %for.second.preheader, label %for.end
345 for.second.preheader:
349 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
350 %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02
351 store i32 0, ptr %arrayidx4, align 4
352 %inc6 = add nsw i64 %j.02, 1
353 %cmp.j = icmp slt i64 %inc6, %N
354 br i1 %cmp.j, label %for.second, label %for.second.exit
360 %j.lcssa = phi i64 [ 0, %for.second.guard ], [ %j.02, %for.second.exit ]
364 define void @pr59024() {
365 ; CHECK-LABEL: @pr59024(
367 ; CHECK-NEXT: br i1 false, label [[FOR_2_PREHEADER:%.*]], label [[FOR_1_PREHEADER:%.*]]
368 ; CHECK: for.1.preheader:
369 ; CHECK-NEXT: br label [[FOR_1:%.*]]
371 ; CHECK-NEXT: br i1 true, label [[FOR_2_PREHEADER_LOOPEXIT:%.*]], label [[FOR_1]]
372 ; CHECK: for.2.preheader.loopexit:
373 ; CHECK-NEXT: br label [[FOR_2_PREHEADER]]
374 ; CHECK: for.2.preheader:
375 ; CHECK-NEXT: br label [[FOR_2:%.*]]
377 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[FOR_2]]
379 ; CHECK-NEXT: ret void
382 br i1 false, label %for.2, label %for.1
384 for.1: ; preds = %for.body6, %entry
385 br i1 true, label %for.2, label %for.1
387 for.2: ; preds = %for.cond13, %for.body6, %entry
388 br i1 true, label %exit, label %for.2
390 exit: ; preds = %for.cond13