1 ; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4 define i32 @test1(i32* %p, i1 %C) {
7 br i1 %C, label %block2, label %block3
12 ; CHECK-NEXT: load i32* %p
26 ; This is a simple phi translation case.
27 define i32 @test2(i32* %p, i32* %q, i1 %C) {
30 br i1 %C, label %block2, label %block3
35 ; CHECK-NEXT: load i32* %q
42 %P2 = phi i32* [%p, %block3], [%q, %block2]
46 ; CHECK-NEXT: phi i32 [
51 ; This is a PRE case that requires phi translation through a GEP.
52 define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
55 %B = getelementptr i32* %q, i32 1
56 store i32* %B, i32** %Hack
57 br i1 %C, label %block2, label %block3
62 ; CHECK-NEXT: load i32* %B
65 %A = getelementptr i32* %p, i32 1
70 %P2 = phi i32* [%p, %block3], [%q, %block2]
71 %P3 = getelementptr i32* %P2, i32 1
75 ; CHECK-NEXT: phi i32 [
80 ;; Here the loaded address is available, but the computation is in 'block3'
81 ;; which does not dominate 'block2'.
82 define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
85 br i1 %C, label %block2, label %block3
91 ; CHECK: br label %block4
94 %B = getelementptr i32* %q, i32 1
95 store i32* %B, i32** %Hack
97 %A = getelementptr i32* %p, i32 1
102 %P2 = phi i32* [%p, %block3], [%q, %block2]
103 %P3 = getelementptr i32* %P2, i32 1
107 ; CHECK-NEXT: phi i32 [
112 ;void test5(int N, double *G) {
114 ; for (j = 0; j < N - 1; j++)
115 ; G[j] = G[j] + G[j+1];
118 define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
122 %1 = icmp sgt i32 %0, 0
123 br i1 %1, label %bb.nph, label %return
126 %tmp = zext i32 %0 to i64
130 ; CHECK: load double*
131 ; CHECK: br label %bb
134 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
135 %tmp6 = add i64 %indvar, 1
136 %scevgep = getelementptr double* %G, i64 %tmp6
137 %scevgep7 = getelementptr double* %G, i64 %indvar
138 %2 = load double* %scevgep7, align 8
139 %3 = load double* %scevgep, align 8
140 %4 = fadd double %2, %3
141 store double %4, double* %scevgep7, align 8
142 %exitcond = icmp eq i64 %tmp6, %tmp
143 br i1 %exitcond, label %return, label %bb
145 ; Should only be one load in the loop.
147 ; CHECK: load double*
148 ; CHECK-NOT: load double*
149 ; CHECK: br i1 %exitcond
155 ;void test6(int N, double *G) {
157 ; for (j = 0; j < N - 1; j++)
158 ; G[j+1] = G[j] + G[j+1];
161 define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
165 %1 = icmp sgt i32 %0, 0
166 br i1 %1, label %bb.nph, label %return
169 %tmp = zext i32 %0 to i64
173 ; CHECK: load double*
174 ; CHECK: br label %bb
177 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
178 %tmp6 = add i64 %indvar, 1
179 %scevgep = getelementptr double* %G, i64 %tmp6
180 %scevgep7 = getelementptr double* %G, i64 %indvar
181 %2 = load double* %scevgep7, align 8
182 %3 = load double* %scevgep, align 8
183 %4 = fadd double %2, %3
184 store double %4, double* %scevgep, align 8
185 %exitcond = icmp eq i64 %tmp6, %tmp
186 br i1 %exitcond, label %return, label %bb
188 ; Should only be one load in the loop.
190 ; CHECK: load double*
191 ; CHECK-NOT: load double*
192 ; CHECK: br i1 %exitcond
198 ;void test7(int N, double* G) {
201 ; for (j = 1; j < N - 1; j++)
202 ; G[j+1] = G[j] + G[j+1];
205 ; This requires phi translation of the adds.
206 define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
208 %0 = getelementptr inbounds double* %G, i64 1
209 store double 1.000000e+00, double* %0, align 8
211 %2 = icmp sgt i32 %1, 1
212 br i1 %2, label %bb.nph, label %return
215 %tmp = sext i32 %1 to i64
216 %tmp7 = add i64 %tmp, -1
220 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
221 %tmp8 = add i64 %indvar, 2
222 %scevgep = getelementptr double* %G, i64 %tmp8
223 %tmp9 = add i64 %indvar, 1
224 %scevgep10 = getelementptr double* %G, i64 %tmp9
225 %3 = load double* %scevgep10, align 8
226 %4 = load double* %scevgep, align 8
227 %5 = fadd double %3, %4
228 store double %5, double* %scevgep, align 8
229 %exitcond = icmp eq i64 %tmp9, %tmp7
230 br i1 %exitcond, label %return, label %bb
232 ; Should only be one load in the loop.
234 ; CHECK: load double*
235 ; CHECK-NOT: load double*
236 ; CHECK: br i1 %exitcond
242 ;; Here the loaded address isn't available in 'block2' at all, requiring a new
243 ;; GEP to be inserted into it.
244 define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
247 br i1 %C, label %block2, label %block3
253 ; CHECK: br label %block4
256 %A = getelementptr i32* %p, i32 1
261 %P2 = phi i32* [%p, %block3], [%q, %block2]
262 %P3 = getelementptr i32* %P2, i32 1
266 ; CHECK-NEXT: phi i32 [
271 ;void test9(int N, double* G) {
273 ; for (j = 1; j < N - 1; j++)
274 ; G[j+1] = G[j] + G[j+1];
277 ; This requires phi translation of the adds.
278 define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
282 %2 = icmp sgt i32 %1, 1
283 br i1 %2, label %bb.nph, label %return
286 %tmp = sext i32 %1 to i64
287 %tmp7 = add i64 %tmp, -1
291 ; CHECK: load double*
292 ; CHECK: br label %bb
295 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
296 %tmp8 = add i64 %indvar, 2
297 %scevgep = getelementptr double* %G, i64 %tmp8
298 %tmp9 = add i64 %indvar, 1
299 %scevgep10 = getelementptr double* %G, i64 %tmp9
300 %3 = load double* %scevgep10, align 8
301 %4 = load double* %scevgep, align 8
302 %5 = fadd double %3, %4
303 store double %5, double* %scevgep, align 8
304 %exitcond = icmp eq i64 %tmp9, %tmp7
305 br i1 %exitcond, label %return, label %bb
307 ; Should only be one load in the loop.
309 ; CHECK: load double*
310 ; CHECK-NOT: load double*
311 ; CHECK: br i1 %exitcond
317 ;void test10(int N, double* G) {
319 ; for (j = 1; j < N - 1; j++)
320 ; G[j] = G[j] + G[j+1] + G[j-1];
324 define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
327 %1 = icmp sgt i32 %0, 1
328 br i1 %1, label %bb.nph, label %return
331 %tmp = sext i32 %0 to i64
332 %tmp8 = add i64 %tmp, -1
335 ; CHECK: load double*
336 ; CHECK: load double*
337 ; CHECK: br label %bb
341 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
342 %scevgep = getelementptr double* %G, i64 %indvar
343 %tmp9 = add i64 %indvar, 2
344 %scevgep10 = getelementptr double* %G, i64 %tmp9
345 %tmp11 = add i64 %indvar, 1
346 %scevgep12 = getelementptr double* %G, i64 %tmp11
347 %2 = load double* %scevgep12, align 8
348 %3 = load double* %scevgep10, align 8
349 %4 = fadd double %2, %3
350 %5 = load double* %scevgep, align 8
351 %6 = fadd double %4, %5
352 store double %6, double* %scevgep12, align 8
353 %exitcond = icmp eq i64 %tmp11, %tmp8
354 br i1 %exitcond, label %return, label %bb
356 ; Should only be one load in the loop.
358 ; CHECK: load double*
359 ; CHECK-NOT: load double*
360 ; CHECK: br i1 %exitcond
366 ; Test critical edge splitting.
367 define i32 @test11(i32* %p, i1 %C, i32 %N) {
370 br i1 %C, label %block2, label %block3
373 %cond = icmp sgt i32 %N, 1
374 br i1 %cond, label %block4, label %block5
375 ; CHECK: load i32* %p
376 ; CHECK-NEXT: br label %block4
387 %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
390 ; CHECK-NEXT: phi i32