1 ; RUN: opt < %s -passes='print<loopnest>' -disable-output 2>&1 | FileCheck %s
3 ; Test a perfect 2-dim loop nest of the form:
8 define void @perf_nest_2D_1(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) {
9 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_1_loop_j, Loops: ( perf_nest_2D_1_loop_j )
10 ; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_1_loop_i, Loops: ( perf_nest_2D_1_loop_i perf_nest_2D_1_loop_j )
12 br label %perf_nest_2D_1_loop_i
14 perf_nest_2D_1_loop_i:
15 %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ]
16 %cmp21 = icmp slt i64 0, %ny
17 br i1 %cmp21, label %perf_nest_2D_1_loop_j, label %inc_i
19 perf_nest_2D_1_loop_j:
20 %j = phi i64 [ 0, %perf_nest_2D_1_loop_i ], [ %inc, %inc_j ]
21 %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j
22 %0 = load i32*, i32** %arrayidx, align 8
23 %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j
24 %1 = load i32, i32* %arrayidx6, align 4
25 %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j
26 %2 = load i32*, i32** %arrayidx8, align 8
27 %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i
28 store i32 %1, i32* %arrayidx11, align 4
32 %inc = add nsw i64 %j, 1
33 %cmp2 = icmp slt i64 %inc, %ny
34 br i1 %cmp2, label %perf_nest_2D_1_loop_j, label %inc_i
37 %inc13 = add nsw i64 %i, 1
38 %cmp = icmp slt i64 %inc13, %nx
39 br i1 %cmp, label %perf_nest_2D_1_loop_i, label %perf_nest_2D_1_loop_i_end
41 perf_nest_2D_1_loop_i_end:
45 ; Test a perfect 2-dim loop nest of the form:
46 ; for (i=0; i<100; ++i)
47 ; for (j=0; j<100; ++j)
49 define void @perf_nest_2D_2(i32** %y, i32** %x) {
50 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_2_loop_j, Loops: ( perf_nest_2D_2_loop_j )
51 ; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_2_loop_i, Loops: ( perf_nest_2D_2_loop_i perf_nest_2D_2_loop_j )
53 br label %perf_nest_2D_2_loop_i
55 perf_nest_2D_2_loop_i:
56 %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ]
57 br label %perf_nest_2D_2_loop_j
59 perf_nest_2D_2_loop_j:
60 %j = phi i64 [ 0, %perf_nest_2D_2_loop_i ], [ %inc, %inc_j ]
61 %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j
62 %0 = load i32*, i32** %arrayidx, align 8
63 %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j
64 %1 = load i32, i32* %arrayidx6, align 4
65 %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j
66 %2 = load i32*, i32** %arrayidx8, align 8
67 %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i
68 store i32 %1, i32* %arrayidx11, align 4
72 %inc = add nsw i64 %j, 1
73 %cmp2 = icmp slt i64 %inc, 100
74 br i1 %cmp2, label %perf_nest_2D_2_loop_j, label %loop_j_end
80 %inc13 = add nsw i64 %i, 1
81 %cmp = icmp slt i64 %inc13, 100
82 br i1 %cmp, label %perf_nest_2D_2_loop_i, label %perf_nest_2D_2_loop_i_end
84 perf_nest_2D_2_loop_i_end:
88 define void @perf_nest_2D_3(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) {
89 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_3_loop_j, Loops: ( perf_nest_2D_3_loop_j )
90 ; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_3_loop_i, Loops: ( perf_nest_2D_3_loop_i perf_nest_2D_3_loop_j )
92 br label %perf_nest_2D_3_loop_i
94 perf_nest_2D_3_loop_i:
95 %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ]
96 %cmp21 = icmp slt i64 0, %ny
100 br i1 %cmp21, label %preheader.j, label %for.end
103 br label %perf_nest_2D_3_loop_j
105 perf_nest_2D_3_loop_j:
106 %j = phi i64 [ 0, %preheader.j ], [ %inc, %inc_j ]
107 %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j
108 %0 = load i32*, i32** %arrayidx, align 8
109 %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j
110 %1 = load i32, i32* %arrayidx6, align 4
111 %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j
112 %2 = load i32*, i32** %arrayidx8, align 8
113 %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i
114 store i32 %1, i32* %arrayidx11, align 4
118 %inc = add nsw i64 %j, 1
119 %cmp2 = icmp slt i64 %inc, %ny
120 br i1 %cmp2, label %perf_nest_2D_3_loop_j, label %for.exit
129 %inc13 = add nsw i64 %i, 1
130 %cmp = icmp slt i64 %inc13, %nx
131 br i1 %cmp, label %perf_nest_2D_3_loop_i, label %perf_nest_2D_3_loop_i_end
133 perf_nest_2D_3_loop_i_end:
137 ; Test a perfect 3-dim loop nest of the form:
138 ; for (i=0; i<nx; ++i)
139 ; for (j=0; j<ny; ++j)
140 ; for (k=0; j<nk; ++k)
141 ; y[j][j][k] = x[i][j][k];
144 define void @perf_nest_3D_1(i32*** %y, i32*** %x, i32 signext %nx, i32 signext %ny, i32 signext %nk) {
145 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_3D_1_loop_k, Loops: ( perf_nest_3D_1_loop_k )
146 ; CHECK-NEXT: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_3D_1_loop_j, Loops: ( perf_nest_3D_1_loop_j perf_nest_3D_1_loop_k )
147 ; CHECK-NEXT: IsPerfect=true, Depth=3, OutermostLoop: perf_nest_3D_1_loop_i, Loops: ( perf_nest_3D_1_loop_i perf_nest_3D_1_loop_j perf_nest_3D_1_loop_k )
149 br label %perf_nest_3D_1_loop_i
151 perf_nest_3D_1_loop_i:
152 %i = phi i32 [ 0, %entry ], [ %inci, %for.inci ]
153 %cmp21 = icmp slt i32 0, %ny
154 br i1 %cmp21, label %perf_nest_3D_1_loop_j, label %for.inci
156 perf_nest_3D_1_loop_j:
157 %j = phi i32 [ 0, %perf_nest_3D_1_loop_i ], [ %incj, %for.incj ]
158 %cmp22 = icmp slt i32 0, %nk
159 br i1 %cmp22, label %perf_nest_3D_1_loop_k, label %for.incj
161 perf_nest_3D_1_loop_k:
162 %k = phi i32 [ 0, %perf_nest_3D_1_loop_j ], [ %inck, %for.inck ]
163 %idxprom = sext i32 %i to i64
164 %arrayidx = getelementptr inbounds i32**, i32*** %x, i64 %idxprom
165 %0 = load i32**, i32*** %arrayidx, align 8
166 %idxprom7 = sext i32 %j to i64
167 %arrayidx8 = getelementptr inbounds i32*, i32** %0, i64 %idxprom7
168 %1 = load i32*, i32** %arrayidx8, align 8
169 %idxprom9 = sext i32 %k to i64
170 %arrayidx10 = getelementptr inbounds i32, i32* %1, i64 %idxprom9
171 %2 = load i32, i32* %arrayidx10, align 4
172 %idxprom11 = sext i32 %j to i64
173 %arrayidx12 = getelementptr inbounds i32**, i32*** %y, i64 %idxprom11
174 %3 = load i32**, i32*** %arrayidx12, align 8
175 %idxprom13 = sext i32 %j to i64
176 %arrayidx14 = getelementptr inbounds i32*, i32** %3, i64 %idxprom13
177 %4 = load i32*, i32** %arrayidx14, align 8
178 %idxprom15 = sext i32 %k to i64
179 %arrayidx16 = getelementptr inbounds i32, i32* %4, i64 %idxprom15
180 store i32 %2, i32* %arrayidx16, align 4
184 %inck = add nsw i32 %k, 1
185 %cmp5 = icmp slt i32 %inck, %nk
186 br i1 %cmp5, label %perf_nest_3D_1_loop_k, label %for.incj
189 %incj = add nsw i32 %j, 1
190 %cmp2 = icmp slt i32 %incj, %ny
191 br i1 %cmp2, label %perf_nest_3D_1_loop_j, label %for.inci
194 %inci = add nsw i32 %i, 1
195 %cmp = icmp slt i32 %inci, %nx
196 br i1 %cmp, label %perf_nest_3D_1_loop_i, label %perf_nest_3D_1_loop_i_end
198 perf_nest_3D_1_loop_i_end:
202 ; Test a perfect 3-dim loop nest of the form:
203 ; for (i=0; i<100; ++i)
204 ; for (j=0; j<100; ++j)
205 ; for (k=0; j<100; ++k)
206 ; y[j][j][k] = x[i][j][k];
209 define void @perf_nest_3D_2(i32*** %y, i32*** %x) {
210 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_3D_2_loop_k, Loops: ( perf_nest_3D_2_loop_k )
211 ; CHECK-NEXT: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_3D_2_loop_j, Loops: ( perf_nest_3D_2_loop_j perf_nest_3D_2_loop_k )
212 ; CHECK-NEXT: IsPerfect=true, Depth=3, OutermostLoop: perf_nest_3D_2_loop_i, Loops: ( perf_nest_3D_2_loop_i perf_nest_3D_2_loop_j perf_nest_3D_2_loop_k )
214 br label %perf_nest_3D_2_loop_i
216 perf_nest_3D_2_loop_i:
217 %i = phi i32 [ 0, %entry ], [ %inci, %for.inci ]
218 br label %perf_nest_3D_2_loop_j
220 perf_nest_3D_2_loop_j:
221 %j = phi i32 [ 0, %perf_nest_3D_2_loop_i ], [ %incj, %for.incj ]
222 br label %perf_nest_3D_2_loop_k
224 perf_nest_3D_2_loop_k:
225 %k = phi i32 [ 0, %perf_nest_3D_2_loop_j ], [ %inck, %for.inck ]
226 %idxprom = sext i32 %i to i64
227 %arrayidx = getelementptr inbounds i32**, i32*** %x, i64 %idxprom
228 %0 = load i32**, i32*** %arrayidx, align 8
229 %idxprom7 = sext i32 %j to i64
230 %arrayidx8 = getelementptr inbounds i32*, i32** %0, i64 %idxprom7
231 %1 = load i32*, i32** %arrayidx8, align 8
232 %idxprom9 = sext i32 %k to i64
233 %arrayidx10 = getelementptr inbounds i32, i32* %1, i64 %idxprom9
234 %2 = load i32, i32* %arrayidx10, align 4
235 %idxprom11 = sext i32 %j to i64
236 %arrayidx12 = getelementptr inbounds i32**, i32*** %y, i64 %idxprom11
237 %3 = load i32**, i32*** %arrayidx12, align 8
238 %idxprom13 = sext i32 %j to i64
239 %arrayidx14 = getelementptr inbounds i32*, i32** %3, i64 %idxprom13
240 %4 = load i32*, i32** %arrayidx14, align 8
241 %idxprom15 = sext i32 %k to i64
242 %arrayidx16 = getelementptr inbounds i32, i32* %4, i64 %idxprom15
243 store i32 %2, i32* %arrayidx16, align 4
247 %inck = add nsw i32 %k, 1
248 %cmp5 = icmp slt i32 %inck, 100
249 br i1 %cmp5, label %perf_nest_3D_2_loop_k, label %loop_k_end
255 %incj = add nsw i32 %j, 1
256 %cmp2 = icmp slt i32 %incj, 100
257 br i1 %cmp2, label %perf_nest_3D_2_loop_j, label %loop_j_end
263 %inci = add nsw i32 %i, 1
264 %cmp = icmp slt i32 %inci, 100
265 br i1 %cmp, label %perf_nest_3D_2_loop_i, label %perf_nest_3D_2_loop_i_end
267 perf_nest_3D_2_loop_i_end:
271 ; Test a perfect loop nest with a live out reduction:
272 ; for (i = 0; i<ni; ++i)
273 ; if (0<nj) { // guard branch for the j-loop
274 ; for (j=0; j<nj; j+=1)
279 define signext i32 @perf_nest_live_out(i32 signext %x, i32 signext %ni, i32 signext %nj) {
280 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_live_out_loop_j, Loops: ( perf_nest_live_out_loop_j )
281 ; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_live_out_loop_i, Loops: ( perf_nest_live_out_loop_i perf_nest_live_out_loop_j )
283 %cmp4 = icmp slt i32 0, %ni
284 br i1 %cmp4, label %perf_nest_live_out_loop_i.lr.ph, label %for.end7
286 perf_nest_live_out_loop_i.lr.ph:
287 br label %perf_nest_live_out_loop_i
289 perf_nest_live_out_loop_i:
290 %x.addr.06 = phi i32 [ %x, %perf_nest_live_out_loop_i.lr.ph ], [ %x.addr.1.lcssa, %for.inc5 ]
291 %i.05 = phi i32 [ 0, %perf_nest_live_out_loop_i.lr.ph ], [ %inc6, %for.inc5 ]
292 %cmp21 = icmp slt i32 0, %nj
293 br i1 %cmp21, label %perf_nest_live_out_loop_j.lr.ph, label %for.inc5
295 perf_nest_live_out_loop_j.lr.ph:
296 br label %perf_nest_live_out_loop_j
298 perf_nest_live_out_loop_j:
299 %x.addr.13 = phi i32 [ %x.addr.06, %perf_nest_live_out_loop_j.lr.ph ], [ %add4, %perf_nest_live_out_loop_j ]
300 %j.02 = phi i32 [ 0, %perf_nest_live_out_loop_j.lr.ph ], [ %inc, %perf_nest_live_out_loop_j ]
301 %add = add nsw i32 %i.05, %j.02
302 %add4 = add nsw i32 %x.addr.13, %add
303 %inc = add nsw i32 %j.02, 1
304 %cmp2 = icmp slt i32 %inc, %nj
305 br i1 %cmp2, label %perf_nest_live_out_loop_j, label %for.cond1.for.inc5_crit_edge
307 for.cond1.for.inc5_crit_edge:
308 %split = phi i32 [ %add4, %perf_nest_live_out_loop_j ]
312 %x.addr.1.lcssa = phi i32 [ %split, %for.cond1.for.inc5_crit_edge ], [ %x.addr.06, %perf_nest_live_out_loop_i ]
313 %inc6 = add nsw i32 %i.05, 1
314 %cmp = icmp slt i32 %inc6, %ni
315 br i1 %cmp, label %perf_nest_live_out_loop_i, label %for.cond.for.end7_crit_edge
317 for.cond.for.end7_crit_edge:
318 %split7 = phi i32 [ %x.addr.1.lcssa, %for.inc5 ]
322 %x.addr.0.lcssa = phi i32 [ %split7, %for.cond.for.end7_crit_edge ], [ %x, %entry ]
323 ret i32 %x.addr.0.lcssa
326 ; Test a perfect loop nest of the form:
327 ; for (int i = 0; i < nx; ++i)
328 ; if (i < ny) { // guard branch for the j-loop
329 ; for (int j=i; j < ny; j+=1)
330 ; y[j][i] = x[i][j] + j;
332 define double @perf_nest_guard_branch(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) {
333 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: test6Loop2, Loops: ( test6Loop2 )
334 ; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: test6Loop1, Loops: ( test6Loop1 test6Loop2 )
336 %cmp2 = icmp slt i32 0, %nx
337 br i1 %cmp2, label %test6Loop1.lr.ph, label %for.end13
339 test6Loop1.lr.ph: ; preds = %entry
342 test6Loop1: ; preds = %test6Loop1.lr.ph, %for.inc11
343 %i.0 = phi i32 [ 0, %test6Loop1.lr.ph ], [ %inc12, %for.inc11 ]
344 %cmp1 = icmp slt i32 %i.0, %ny
345 br i1 %cmp1, label %test6Loop2.lr.ph, label %if.end
347 test6Loop2.lr.ph: ; preds = %if.then
350 test6Loop2: ; preds = %test6Loop2.lr.ph, %for.inc
351 %j.0 = phi i32 [ %i.0, %test6Loop2.lr.ph ], [ %inc, %for.inc ]
352 %idxprom = sext i32 %i.0 to i64
353 %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom
354 %0 = load i32*, i32** %arrayidx, align 8
355 %idxprom5 = sext i32 %j.0 to i64
356 %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5
357 %1 = load i32, i32* %arrayidx6, align 4
358 %add = add nsw i32 %1, %j.0
359 %idxprom7 = sext i32 %j.0 to i64
360 %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7
361 %2 = load i32*, i32** %arrayidx8, align 8
362 %idxprom9 = sext i32 %i.0 to i64
363 %arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9
364 store i32 %add, i32* %arrayidx10, align 4
367 for.inc: ; preds = %test6Loop2
368 %inc = add nsw i32 %j.0, 1
369 %cmp3 = icmp slt i32 %inc, %ny
370 br i1 %cmp3, label %test6Loop2, label %for.cond2.for.end_crit_edge
372 for.cond2.for.end_crit_edge: ; preds = %for.inc
375 for.end: ; preds = %for.cond2.for.end_crit_edge, %if.then
378 if.end: ; preds = %for.end, %test6Loop1
381 for.inc11: ; preds = %if.end
382 %inc12 = add nsw i32 %i.0, 1
383 %cmp = icmp slt i32 %inc12, %nx
384 br i1 %cmp, label %test6Loop1, label %for.cond.for.end13_crit_edge
386 for.cond.for.end13_crit_edge: ; preds = %for.inc11
389 for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry
390 %arrayidx14 = getelementptr inbounds i32*, i32** %y, i64 0
391 %3 = load i32*, i32** %arrayidx14, align 8
392 %arrayidx15 = getelementptr inbounds i32, i32* %3, i64 0
393 %4 = load i32, i32* %arrayidx15, align 4
394 %conv = sitofp i32 %4 to double
398 ; Test a perfect loop nest of the form:
399 ; for (int i = 0; i < nx; ++i)
400 ; if (i < ny) { // guard branch for the j-loop
401 ; for (int j=i; j < ny; j+=1)
402 ; y[j][i] = x[i][j] + j;
405 define double @test6(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) {
406 ; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: test6Loop2, Loops: ( test6Loop2 )
407 ; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: test6Loop1, Loops: ( test6Loop1 test6Loop2 )
409 %cmp2 = icmp slt i32 0, %nx
410 br i1 %cmp2, label %test6Loop1.lr.ph, label %for.end13
412 test6Loop1.lr.ph: ; preds = %entry
415 test6Loop1: ; preds = %test6Loop1.lr.ph, %for.inc11
416 %i.0 = phi i32 [ 0, %test6Loop1.lr.ph ], [ %inc12, %for.inc11 ]
417 %cmp1 = icmp slt i32 %i.0, %ny
418 br i1 %cmp1, label %test6Loop2.lr.ph, label %if.end
420 test6Loop2.lr.ph: ; preds = %if.then
423 test6Loop2: ; preds = %test6Loop2.lr.ph, %for.inc
424 %j.0 = phi i32 [ %i.0, %test6Loop2.lr.ph ], [ %inc, %for.inc ]
425 %idxprom = sext i32 %i.0 to i64
426 %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom
427 %0 = load i32*, i32** %arrayidx, align 8
428 %idxprom5 = sext i32 %j.0 to i64
429 %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5
430 %1 = load i32, i32* %arrayidx6, align 4
431 %add = add nsw i32 %1, %j.0
432 %idxprom7 = sext i32 %j.0 to i64
433 %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7
434 %2 = load i32*, i32** %arrayidx8, align 8
435 %idxprom9 = sext i32 %i.0 to i64
436 %arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9
437 store i32 %add, i32* %arrayidx10, align 4
440 for.inc: ; preds = %test6Loop2
441 %inc = add nsw i32 %j.0, 1
442 %cmp3 = icmp slt i32 %inc, %ny
443 br i1 %cmp3, label %test6Loop2, label %for.cond2.for.end_crit_edge
445 for.cond2.for.end_crit_edge: ; preds = %for.inc
448 for.end: ; preds = %for.cond2.for.end_crit_edge, %if.then
451 if.end: ; preds = %for.end, %test6Loop1
454 for.inc11: ; preds = %if.end
455 %inc12 = add nsw i32 %i.0, 1
456 %cmp = icmp slt i32 %inc12, %nx
457 br i1 %cmp, label %test6Loop1, label %for.cond.for.end13_crit_edge
459 for.cond.for.end13_crit_edge: ; preds = %for.inc11
462 for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry
463 %arrayidx14 = getelementptr inbounds i32*, i32** %y, i64 0
464 %3 = load i32*, i32** %arrayidx14, align 8
465 %arrayidx15 = getelementptr inbounds i32, i32* %3, i64 0
466 %4 = load i32, i32* %arrayidx15, align 4
467 %conv = sitofp i32 %4 to double