1 ; RUN: opt < %s -licm -S | FileCheck %s
2 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
3 ; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
5 @X = global i32 0 ; <i32*> [#uses=1]
9 declare i32 @llvm.bitreverse.i32(i32)
11 ; This testcase tests for a problem where LICM hoists
12 ; potentially trapping instructions when they are not guaranteed to execute.
13 define i32 @test1(i1 %c) {
14 ; CHECK-LABEL: @test1(
15 %A = load i32, i32* @X ; <i32> [#uses=2]
17 Loop: ; preds = %LoopTail, %0
19 br i1 %c, label %LoopTail, label %IfUnEqual
21 IfUnEqual: ; preds = %Loop
23 ; CHECK-NEXT: sdiv i32 4, %A
24 %B1 = sdiv i32 4, %A ; <i32> [#uses=1]
27 LoopTail: ; preds = %IfUnEqual, %Loop
28 %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1]
29 br i1 %c, label %Loop, label %Out
30 Out: ; preds = %LoopTail
31 %C = sub i32 %A, %B ; <i32> [#uses=1]
36 declare void @foo2(i32) nounwind
39 ;; It is ok and desirable to hoist this potentially trapping instruction.
40 define i32 @test2(i1 %c) {
41 ; CHECK-LABEL: @test2(
42 ; CHECK-NEXT: load i32, i32* @X
43 ; CHECK-NEXT: %B = sdiv i32 4, %A
44 %A = load i32, i32* @X
48 ;; Should have hoisted this div!
53 call void @foo2( i32 %B )
54 br i1 %c, label %Loop, label %Out
62 ; This loop invariant instruction should be constant folded, not hoisted.
63 define i32 @test3(i1 %c) {
64 ; CHECK-LABEL: define i32 @test3(
65 ; CHECK: call void @foo2(i32 6)
66 %A = load i32, i32* @X ; <i32> [#uses=2]
69 %B = add i32 4, 2 ; <i32> [#uses=2]
70 call void @foo2( i32 %B )
71 br i1 %c, label %Loop, label %Out
73 %C = sub i32 %A, %B ; <i32> [#uses=1]
77 ; CHECK-LABEL: @test4(
81 define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
85 for.body: ; preds = %entry, %for.body
86 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
87 %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
88 call void @foo_may_call_exit(i32 0)
89 %div = sdiv i32 %x, %y
90 %add = add nsw i32 %n.01, %div
91 %inc = add nsw i32 %i.02, 1
92 %cmp = icmp slt i32 %inc, 10000
93 br i1 %cmp, label %for.body, label %for.end
95 for.end: ; preds = %for.body
96 %n.0.lcssa = phi i32 [ %add, %for.body ]
100 declare void @foo_may_call_exit(i32)
103 ; CHECK-LABEL: @test5(
104 ; CHECK: extractvalue
105 ; CHECK: br label %tailrecurse
106 ; CHECK: tailrecurse:
109 define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
111 br label %tailrecurse
113 tailrecurse: ; preds = %then, %entry
114 %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
115 %out = extractvalue { i32*, i32 } %e, 1
116 %d = insertvalue { i32*, i32 } %e, i32* null, 0
117 %cmp1 = icmp sgt i32 %out, %i.tr
118 br i1 %cmp1, label %then, label %ifend
120 then: ; preds = %tailrecurse
122 %cmp2 = add i32 %i.tr, 1
123 br label %tailrecurse
125 ifend: ; preds = %tailrecurse
129 ; CHECK: define void @test6(float %f)
131 ; CHECK: br label %for.body
132 define void @test6(float %f) #2 {
136 for.body: ; preds = %for.body, %entry
137 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
138 call void @foo_may_call_exit(i32 0)
140 call void @use(float %neg)
141 %inc = add nsw i32 %i, 1
142 %cmp = icmp slt i32 %inc, 10000
143 br i1 %cmp, label %for.body, label %for.end
145 for.end: ; preds = %for.body
149 declare void @use(float)
151 ; CHECK: define i32 @hoist_bitreverse(i32 %0)
153 ; CHECK: br label %header
154 define i32 @hoist_bitreverse(i32 %0) {
158 %sum = phi i32 [ 0, %1 ], [ %5, %latch ]
159 %2 = phi i32 [ 0, %1 ], [ %6, %latch ]
160 %3 = icmp slt i32 %2, 1024
161 br i1 %3, label %body, label %return
164 %4 = call i32 @llvm.bitreverse.i32(i32 %0)
165 %5 = add i32 %sum, %4
169 %6 = add nsw i32 %2, 1
176 ; Can neither sink nor hoist
177 define i32 @test_volatile(i1 %c) {
178 ; CHECK-LABEL: @test_volatile(
180 ; CHECK: load volatile i32, i32* @X
185 %A = load volatile i32, i32* @X
186 br i1 %c, label %Loop, label %Out
193 declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
194 declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
195 declare void @escaping.invariant.start({}*) nounwind
196 ; invariant.start dominates the load, and in this scope, the
197 ; load is invariant. So, we can hoist the `addrld` load out of the loop.
198 define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
199 ; CHECK-LABEL: @test_fence
201 ; CHECK: invariant.start
202 ; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
203 ; CHECK: br label %loop
205 %gep = getelementptr inbounds i8, i8* %addr, i64 8
206 %addr.i = bitcast i8* %gep to i32 *
207 store atomic i32 5, i32 * %addr.i unordered, align 8
209 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
213 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
214 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
215 %volload = load atomic i8, i8* %volatile unordered, align 8
217 %volchk = icmp eq i8 %volload, 0
218 %addrld = load atomic i32, i32* %addr.i unordered, align 8
219 %sel = select i1 %volchk, i32 0, i32 %addrld
220 %sum.next = add i32 %sel, %sum
221 %indvar.next = add i32 %indvar, 1
222 %cond = icmp slt i32 %indvar.next, %n
223 br i1 %cond, label %loop, label %loopexit
231 ; Same as test above, but the load is no longer invariant (presence of
232 ; invariant.end). We cannot hoist the addrld out of loop.
233 define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
234 ; CHECK-LABEL: @test_fence1
236 ; CHECK: invariant.start
237 ; CHECK-NEXT: invariant.end
238 ; CHECK-NEXT: br label %loop
240 %gep = getelementptr inbounds i8, i8* %addr, i64 8
241 %addr.i = bitcast i8* %gep to i32 *
242 store atomic i32 5, i32 * %addr.i unordered, align 8
244 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
245 call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
249 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
250 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
251 %volload = load atomic i8, i8* %volatile unordered, align 8
253 %volchk = icmp eq i8 %volload, 0
254 %addrld = load atomic i32, i32* %addr.i unordered, align 8
255 %sel = select i1 %volchk, i32 0, i32 %addrld
256 %sum.next = add i32 %sel, %sum
257 %indvar.next = add i32 %indvar, 1
258 %cond = icmp slt i32 %indvar.next, %n
259 br i1 %cond, label %loop, label %loopexit
265 ; same as test above, but instead of invariant.end, we have the result of
266 ; invariant.start escaping through a call. We cannot hoist the load.
267 define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
268 ; CHECK-LABEL: @test_fence2
271 ; CHECK: br label %loop
273 %gep = getelementptr inbounds i8, i8* %addr, i64 8
274 %addr.i = bitcast i8* %gep to i32 *
275 store atomic i32 5, i32 * %addr.i unordered, align 8
277 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
278 call void @escaping.invariant.start({}* %invst)
282 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
283 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
284 %volload = load atomic i8, i8* %volatile unordered, align 8
286 %volchk = icmp eq i8 %volload, 0
287 %addrld = load atomic i32, i32* %addr.i unordered, align 8
288 %sel = select i1 %volchk, i32 0, i32 %addrld
289 %sum.next = add i32 %sel, %sum
290 %indvar.next = add i32 %indvar, 1
291 %cond = icmp slt i32 %indvar.next, %n
292 br i1 %cond, label %loop, label %loopexit
298 ; FIXME: invariant.start dominates the load, and in this scope, the
299 ; load is invariant. So, we can hoist the `addrld` load out of the loop.
300 ; Consider the loadoperand addr.i bitcasted before being passed to
302 define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
303 ; CHECK-LABEL: @test_fence3
305 ; CHECK: invariant.start
306 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
307 ; CHECK: br label %loop
309 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
310 %gep = bitcast i32* %addr.i to i8 *
311 store atomic i32 5, i32 * %addr.i unordered, align 8
313 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
317 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
318 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
319 %volload = load atomic i8, i8* %volatile unordered, align 8
321 %volchk = icmp eq i8 %volload, 0
322 %addrld = load atomic i32, i32* %addr.i unordered, align 8
323 %sel = select i1 %volchk, i32 0, i32 %addrld
324 %sum.next = add i32 %sel, %sum
325 %indvar.next = add i32 %indvar, 1
326 %cond = icmp slt i32 %indvar.next, %n
327 br i1 %cond, label %loop, label %loopexit
333 ; We should not hoist the addrld out of the loop.
334 define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
335 ; CHECK-LABEL: @test_fence4
337 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
338 ; CHECK: br label %loop
340 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
341 %gep = bitcast i32* %addr.i to i8 *
345 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
346 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
347 store atomic i32 5, i32 * %addr.i unordered, align 8
349 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
350 %volload = load atomic i8, i8* %volatile unordered, align 8
352 %volchk = icmp eq i8 %volload, 0
353 %addrld = load atomic i32, i32* %addr.i unordered, align 8
354 %sel = select i1 %volchk, i32 0, i32 %addrld
355 %sum.next = add i32 %sel, %sum
356 %indvar.next = add i32 %indvar, 1
357 %cond = icmp slt i32 %indvar.next, %n
358 br i1 %cond, label %loop, label %loopexit