1 ; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4 define i32 @test1(i32* %p, i1 %C) {
7 br i1 %C, label %block2, label %block3
12 ; CHECK-NEXT: load i32, i32* %p
19 %PRE = load i32, i32* %p
26 ; This is a simple phi translation case.
27 define i32 @test2(i32* %p, i32* %q, i1 %C) {
28 ; CHECK-LABEL: @test2(
30 br i1 %C, label %block2, label %block3
35 ; CHECK-NEXT: load i32, i32* %q
42 %P2 = phi i32* [%p, %block3], [%q, %block2]
43 %PRE = load i32, i32* %P2
46 ; CHECK-NEXT: phi i32 [
51 ; This is a PRE case that requires phi translation through a GEP.
52 define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
53 ; CHECK-LABEL: @test3(
55 %B = getelementptr i32, i32* %q, i32 1
56 store i32* %B, i32** %Hack
57 br i1 %C, label %block2, label %block3
62 ; CHECK-NEXT: load i32, i32* %B
65 %A = getelementptr i32, i32* %p, i32 1
70 %P2 = phi i32* [%p, %block3], [%q, %block2]
71 %P3 = getelementptr i32, i32* %P2, i32 1
72 %PRE = load i32, i32* %P3
80 ;; Here the loaded address is available, but the computation is in 'block3'
81 ;; which does not dominate 'block2'.
82 define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
83 ; CHECK-LABEL: @test4(
85 br i1 %C, label %block2, label %block3
90 ; CHECK: load i32, i32*
91 ; CHECK: br label %block4
94 %B = getelementptr i32, i32* %q, i32 1
95 store i32* %B, i32** %Hack
97 %A = getelementptr i32, i32* %p, i32 1
102 %P2 = phi i32* [%p, %block3], [%q, %block2]
103 %P3 = getelementptr i32, i32* %P2, i32 1
104 %PRE = load i32, i32* %P3
112 ;void test5(int N, double *G) {
114 ; for (j = 0; j < N - 1; j++)
115 ; G[j] = G[j] + G[j+1];
118 define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
119 ; CHECK-LABEL: @test5(
122 %1 = icmp sgt i32 %0, 0
123 br i1 %1, label %bb.nph, label %return
126 %tmp = zext i32 %0 to i64
130 ; CHECK: load double, double*
131 ; CHECK: br label %bb
134 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
135 %tmp6 = add i64 %indvar, 1
136 %scevgep = getelementptr double, double* %G, i64 %tmp6
137 %scevgep7 = getelementptr double, double* %G, i64 %indvar
138 %2 = load double, double* %scevgep7, align 8
139 %3 = load double, double* %scevgep, align 8
140 %4 = fadd double %2, %3
141 store double %4, double* %scevgep7, align 8
142 %exitcond = icmp eq i64 %tmp6, %tmp
143 br i1 %exitcond, label %return, label %bb
145 ; Should only be one load in the loop.
147 ; CHECK: load double, double*
148 ; CHECK-NOT: load double, double*
149 ; CHECK: br i1 %exitcond
155 ;void test6(int N, double *G) {
157 ; for (j = 0; j < N - 1; j++)
158 ; G[j+1] = G[j] + G[j+1];
161 define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
162 ; CHECK-LABEL: @test6(
165 %1 = icmp sgt i32 %0, 0
166 br i1 %1, label %bb.nph, label %return
169 %tmp = zext i32 %0 to i64
173 ; CHECK: load double, double*
174 ; CHECK: br label %bb
177 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
178 %tmp6 = add i64 %indvar, 1
179 %scevgep = getelementptr double, double* %G, i64 %tmp6
180 %scevgep7 = getelementptr double, double* %G, i64 %indvar
181 %2 = load double, double* %scevgep7, align 8
182 %3 = load double, double* %scevgep, align 8
183 %4 = fadd double %2, %3
184 store double %4, double* %scevgep, align 8
185 %exitcond = icmp eq i64 %tmp6, %tmp
186 br i1 %exitcond, label %return, label %bb
188 ; Should only be one load in the loop.
190 ; CHECK: load double, double*
191 ; CHECK-NOT: load double, double*
192 ; CHECK: br i1 %exitcond
198 ;void test7(int N, double* G) {
201 ; for (j = 1; j < N - 1; j++)
202 ; G[j+1] = G[j] + G[j+1];
205 ; This requires phi translation of the adds.
206 define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
208 %0 = getelementptr inbounds double, double* %G, i64 1
209 store double 1.000000e+00, double* %0, align 8
211 %2 = icmp sgt i32 %1, 1
212 br i1 %2, label %bb.nph, label %return
215 %tmp = sext i32 %1 to i64
216 %tmp7 = add i64 %tmp, -1
220 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
221 %tmp8 = add i64 %indvar, 2
222 %scevgep = getelementptr double, double* %G, i64 %tmp8
223 %tmp9 = add i64 %indvar, 1
224 %scevgep10 = getelementptr double, double* %G, i64 %tmp9
225 %3 = load double, double* %scevgep10, align 8
226 %4 = load double, double* %scevgep, align 8
227 %5 = fadd double %3, %4
228 store double %5, double* %scevgep, align 8
229 %exitcond = icmp eq i64 %tmp9, %tmp7
230 br i1 %exitcond, label %return, label %bb
232 ; Should only be one load in the loop.
234 ; CHECK: load double, double*
235 ; CHECK-NOT: load double, double*
236 ; CHECK: br i1 %exitcond
242 ;; Here the loaded address isn't available in 'block2' at all, requiring a new
243 ;; GEP to be inserted into it.
244 define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
245 ; CHECK-LABEL: @test8(
247 br i1 %C, label %block2, label %block3
252 ; CHECK: load i32, i32*
253 ; CHECK: br label %block4
256 %A = getelementptr i32, i32* %p, i32 1
261 %P2 = phi i32* [%p, %block3], [%q, %block2]
262 %P3 = getelementptr i32, i32* %P2, i32 1
263 %PRE = load i32, i32* %P3
271 ;void test9(int N, double* G) {
273 ; for (j = 1; j < N - 1; j++)
274 ; G[j+1] = G[j] + G[j+1];
277 ; This requires phi translation of the adds.
278 define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
282 %2 = icmp sgt i32 %1, 1
283 br i1 %2, label %bb.nph, label %return
286 %tmp = sext i32 %1 to i64
287 %tmp7 = add i64 %tmp, -1
291 ; CHECK: load double, double*
292 ; CHECK: br label %bb
295 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
296 %tmp8 = add i64 %indvar, 2
297 %scevgep = getelementptr double, double* %G, i64 %tmp8
298 %tmp9 = add i64 %indvar, 1
299 %scevgep10 = getelementptr double, double* %G, i64 %tmp9
300 %3 = load double, double* %scevgep10, align 8
301 %4 = load double, double* %scevgep, align 8
302 %5 = fadd double %3, %4
303 store double %5, double* %scevgep, align 8
304 %exitcond = icmp eq i64 %tmp9, %tmp7
305 br i1 %exitcond, label %return, label %bb
307 ; Should only be one load in the loop.
309 ; CHECK: load double, double*
310 ; CHECK-NOT: load double, double*
311 ; CHECK: br i1 %exitcond
317 ;void test10(int N, double* G) {
319 ; for (j = 1; j < N - 1; j++)
320 ; G[j] = G[j] + G[j+1] + G[j-1];
324 define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
327 %1 = icmp sgt i32 %0, 1
328 br i1 %1, label %bb.nph, label %return
331 %tmp = sext i32 %0 to i64
332 %tmp8 = add i64 %tmp, -1
335 ; CHECK: load double, double*
336 ; CHECK: load double, double*
337 ; CHECK: br label %bb
341 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
342 %scevgep = getelementptr double, double* %G, i64 %indvar
343 %tmp9 = add i64 %indvar, 2
344 %scevgep10 = getelementptr double, double* %G, i64 %tmp9
345 %tmp11 = add i64 %indvar, 1
346 %scevgep12 = getelementptr double, double* %G, i64 %tmp11
347 %2 = load double, double* %scevgep12, align 8
348 %3 = load double, double* %scevgep10, align 8
349 %4 = fadd double %2, %3
350 %5 = load double, double* %scevgep, align 8
351 %6 = fadd double %4, %5
352 store double %6, double* %scevgep12, align 8
353 %exitcond = icmp eq i64 %tmp11, %tmp8
354 br i1 %exitcond, label %return, label %bb
356 ; Should only be one load in the loop.
358 ; CHECK: load double, double*
359 ; CHECK-NOT: load double, double*
360 ; CHECK: br i1 %exitcond
366 ; Test critical edge splitting.
367 define i32 @test11(i32* %p, i1 %C, i32 %N) {
368 ; CHECK-LABEL: @test11(
370 br i1 %C, label %block2, label %block3
373 %cond = icmp sgt i32 %N, 1
374 br i1 %cond, label %block4, label %block5
375 ; CHECK: load i32, i32* %p
376 ; CHECK-NEXT: br label %block4
383 %PRE = load i32, i32* %p
387 %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
390 ; CHECK-NEXT: phi i32
395 declare i32 @__CxxFrameHandler3(...)
397 ; Test that loads aren't PRE'd into EH pads.
398 define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
399 ; CHECK-LABEL: @test12(
402 to label %block2 unwind label %catch.dispatch
406 to label %block3 unwind label %cleanup
412 %cs1 = catchswitch within none [label %catch] unwind label %cleanup2
415 %c = catchpad within %cs1 []
416 catchret from %c to label %block2
419 %c1 = cleanuppad within none []
421 cleanupret from %c1 unwind label %cleanup2
425 ; CHECK-NEXT: %c2 = cleanuppad within none []
426 ; CHECK-NEXT: %NOTPRE = load i32, i32* %p
428 %c2 = cleanuppad within none []
429 %NOTPRE = load i32, i32* %p
430 call void @g(i32 %NOTPRE)
431 cleanupret from %c2 unwind to caller
434 ; Don't PRE load across potentially throwing calls.
436 define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
438 ; CHECK-LABEL: @test13(
440 ; CHECK-NEXT: icmp eq
444 %tobool = icmp eq i32 %a, 0
445 br i1 %tobool, label %if.end, label %if.then
448 ; CHECK-NEXT: load i32
449 ; CHECK-NEXT: store i32
452 %uu = load i32, i32* %x, align 4
453 store i32 %uu, i32* %r, align 4
457 ; CHECK-NEXT: call void @f()
458 ; CHECK-NEXT: load i32
462 %vv = load i32, i32* %x, align 4
466 ; Same as test13, but now the blocking function is not immediately in load's
469 define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
471 ; CHECK-LABEL: @test14(
473 ; CHECK-NEXT: icmp eq
477 %tobool = icmp eq i32 %a, 0
478 br i1 %tobool, label %if.end, label %if.then
481 ; CHECK-NEXT: load i32
482 ; CHECK-NEXT: store i32
485 %uu = load i32, i32* %x, align 4
486 store i32 %uu, i32* %r, align 4
490 ; CHECK-NEXT: call void @f()
491 ; CHECK-NEXT: load i32
501 %vv = load i32, i32* %x, align 4
505 ; Same as test13, but %x here is dereferenceable. A pointer that is
506 ; dereferenceable can be loaded from speculatively without a risk of trapping.
507 ; Since it is OK to speculate, PRE is allowed.
509 define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
511 ; CHECK-LABEL: @test15
513 ; CHECK-NEXT: icmp eq
517 %tobool = icmp eq i32 %a, 0
518 br i1 %tobool, label %if.end, label %if.then
520 ; CHECK: entry.if.end_crit_edge:
521 ; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
522 ; CHECK-NEXT: br label %if.end
525 %uu = load i32, i32* %x, align 4
526 store i32 %uu, i32* %r, align 4
530 ; CHECK-NEXT: %uu = load i32, i32* %x, align 4
531 ; CHECK-NEXT: store i32 %uu, i32* %r, align 4
532 ; CHECK-NEXT: br label %if.end
536 %vv = load i32, i32* %x, align 4
540 ; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
541 ; CHECK-NEXT: call void @f()
542 ; CHECK-NEXT: ret i32 %vv
546 ; Same as test14, but %x here is dereferenceable. A pointer that is
547 ; dereferenceable can be loaded from speculatively without a risk of trapping.
548 ; Since it is OK to speculate, PRE is allowed.
550 define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
552 ; CHECK-LABEL: @test16(
554 ; CHECK-NEXT: icmp eq
558 %tobool = icmp eq i32 %a, 0
559 br i1 %tobool, label %if.end, label %if.then
561 ; CHECK: entry.if.end_crit_edge:
562 ; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
563 ; CHECK-NEXT: br label %if.end
566 %uu = load i32, i32* %x, align 4
567 store i32 %uu, i32* %r, align 4
571 ; CHECK-NEXT: %uu = load i32, i32* %x, align 4
572 ; CHECK-NEXT: store i32 %uu, i32* %r, align 4
573 ; CHECK-NEXT: br label %if.end
580 ; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
581 ; CHECK-NEXT: call void @f()
582 ; CHECK-NEXT: ret i32 %vv
588 %vv = load i32, i32* %x, align 4