1 ; RUN: opt < %s -licm -S | FileCheck %s
2 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
3 ; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
5 @X = global i32 0 ; <i32*> [#uses=1]
9 declare i32 @llvm.bitreverse.i32(i32)
11 ; This testcase tests for a problem where LICM hoists
12 ; potentially trapping instructions when they are not guaranteed to execute.
13 define i32 @test1(i1 %c) {
14 ; CHECK-LABEL: @test1(
15 %A = load i32, i32* @X ; <i32> [#uses=2]
17 Loop: ; preds = %LoopTail, %0
19 br i1 %c, label %LoopTail, label %IfUnEqual
21 IfUnEqual: ; preds = %Loop
23 ; CHECK-NEXT: sdiv i32 4, %A
24 %B1 = sdiv i32 4, %A ; <i32> [#uses=1]
27 LoopTail: ; preds = %IfUnEqual, %Loop
28 %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1]
29 br i1 %c, label %Loop, label %Out
30 Out: ; preds = %LoopTail
31 %C = sub i32 %A, %B ; <i32> [#uses=1]
36 declare void @foo2(i32) nounwind
39 ;; It is ok and desirable to hoist this potentially trapping instruction.
40 define i32 @test2(i1 %c) {
41 ; CHECK-LABEL: @test2(
42 ; CHECK-NEXT: load i32, i32* @X
43 ; CHECK-NEXT: %B = sdiv i32 4, %A
44 %A = load i32, i32* @X
48 ;; Should have hoisted this div!
53 call void @foo2( i32 %B )
54 br i1 %c, label %Loop, label %Out
62 ; This loop invariant instruction should be constant folded, not hoisted.
63 define i32 @test3(i1 %c) {
64 ; CHECK-LABEL: define i32 @test3(
65 ; CHECK: call void @foo2(i32 6)
66 %A = load i32, i32* @X ; <i32> [#uses=2]
69 %B = add i32 4, 2 ; <i32> [#uses=2]
70 call void @foo2( i32 %B )
71 br i1 %c, label %Loop, label %Out
73 %C = sub i32 %A, %B ; <i32> [#uses=1]
77 ; CHECK-LABEL: @test4(
81 define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
85 for.body: ; preds = %entry, %for.body
86 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
87 %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
88 call void @foo_may_call_exit(i32 0)
89 %div = sdiv i32 %x, %y
90 %add = add nsw i32 %n.01, %div
91 %inc = add nsw i32 %i.02, 1
92 %cmp = icmp slt i32 %inc, 10000
93 br i1 %cmp, label %for.body, label %for.end
95 for.end: ; preds = %for.body
96 %n.0.lcssa = phi i32 [ %add, %for.body ]
100 declare void @foo_may_call_exit(i32)
103 ; CHECK-LABEL: @test5(
104 ; CHECK: extractvalue
105 ; CHECK: br label %tailrecurse
106 ; CHECK: tailrecurse:
109 define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
111 br label %tailrecurse
113 tailrecurse: ; preds = %then, %entry
114 %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
115 %out = extractvalue { i32*, i32 } %e, 1
116 %d = insertvalue { i32*, i32 } %e, i32* null, 0
117 %cmp1 = icmp sgt i32 %out, %i.tr
118 br i1 %cmp1, label %then, label %ifend
120 then: ; preds = %tailrecurse
122 %cmp2 = add i32 %i.tr, 1
123 br label %tailrecurse
125 ifend: ; preds = %tailrecurse
129 ; CHECK: define i32 @hoist_bitreverse(i32)
131 ; CHECK: br label %header
132 define i32 @hoist_bitreverse(i32) {
136 %sum = phi i32 [ 0, %1 ], [ %5, %latch ]
137 %2 = phi i32 [ 0, %1 ], [ %6, %latch ]
138 %3 = icmp slt i32 %2, 1024
139 br i1 %3, label %body, label %return
142 %4 = call i32 @llvm.bitreverse.i32(i32 %0)
143 %5 = add i32 %sum, %4
147 %6 = add nsw i32 %2, 1
154 ; Can neither sink nor hoist
155 define i32 @test_volatile(i1 %c) {
156 ; CHECK-LABEL: @test_volatile(
158 ; CHECK: load volatile i32, i32* @X
163 %A = load volatile i32, i32* @X
164 br i1 %c, label %Loop, label %Out
171 declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
172 declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
173 declare void @escaping.invariant.start({}*) nounwind
174 ; invariant.start dominates the load, and in this scope, the
175 ; load is invariant. So, we can hoist the `addrld` load out of the loop.
176 define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
177 ; CHECK-LABEL: @test_fence
179 ; CHECK: invariant.start
180 ; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
181 ; CHECK: br label %loop
183 %gep = getelementptr inbounds i8, i8* %addr, i64 8
184 %addr.i = bitcast i8* %gep to i32 *
185 store atomic i32 5, i32 * %addr.i unordered, align 8
187 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
191 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
192 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
193 %volload = load atomic i8, i8* %volatile unordered, align 8
195 %volchk = icmp eq i8 %volload, 0
196 %addrld = load atomic i32, i32* %addr.i unordered, align 8
197 %sel = select i1 %volchk, i32 0, i32 %addrld
198 %sum.next = add i32 %sel, %sum
199 %indvar.next = add i32 %indvar, 1
200 %cond = icmp slt i32 %indvar.next, %n
201 br i1 %cond, label %loop, label %loopexit
209 ; Same as test above, but the load is no longer invariant (presence of
210 ; invariant.end). We cannot hoist the addrld out of loop.
211 define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
212 ; CHECK-LABEL: @test_fence1
214 ; CHECK: invariant.start
215 ; CHECK-NEXT: invariant.end
216 ; CHECK-NEXT: br label %loop
218 %gep = getelementptr inbounds i8, i8* %addr, i64 8
219 %addr.i = bitcast i8* %gep to i32 *
220 store atomic i32 5, i32 * %addr.i unordered, align 8
222 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
223 call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
227 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
228 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
229 %volload = load atomic i8, i8* %volatile unordered, align 8
231 %volchk = icmp eq i8 %volload, 0
232 %addrld = load atomic i32, i32* %addr.i unordered, align 8
233 %sel = select i1 %volchk, i32 0, i32 %addrld
234 %sum.next = add i32 %sel, %sum
235 %indvar.next = add i32 %indvar, 1
236 %cond = icmp slt i32 %indvar.next, %n
237 br i1 %cond, label %loop, label %loopexit
243 ; same as test above, but instead of invariant.end, we have the result of
244 ; invariant.start escaping through a call. We cannot hoist the load.
245 define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
246 ; CHECK-LABEL: @test_fence2
249 ; CHECK: br label %loop
251 %gep = getelementptr inbounds i8, i8* %addr, i64 8
252 %addr.i = bitcast i8* %gep to i32 *
253 store atomic i32 5, i32 * %addr.i unordered, align 8
255 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
256 call void @escaping.invariant.start({}* %invst)
260 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
261 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
262 %volload = load atomic i8, i8* %volatile unordered, align 8
264 %volchk = icmp eq i8 %volload, 0
265 %addrld = load atomic i32, i32* %addr.i unordered, align 8
266 %sel = select i1 %volchk, i32 0, i32 %addrld
267 %sum.next = add i32 %sel, %sum
268 %indvar.next = add i32 %indvar, 1
269 %cond = icmp slt i32 %indvar.next, %n
270 br i1 %cond, label %loop, label %loopexit
276 ; FIXME: invariant.start dominates the load, and in this scope, the
277 ; load is invariant. So, we can hoist the `addrld` load out of the loop.
278 ; Consider the loadoperand addr.i bitcasted before being passed to
280 define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
281 ; CHECK-LABEL: @test_fence3
283 ; CHECK: invariant.start
284 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
285 ; CHECK: br label %loop
287 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
288 %gep = bitcast i32* %addr.i to i8 *
289 store atomic i32 5, i32 * %addr.i unordered, align 8
291 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
295 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
296 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
297 %volload = load atomic i8, i8* %volatile unordered, align 8
299 %volchk = icmp eq i8 %volload, 0
300 %addrld = load atomic i32, i32* %addr.i unordered, align 8
301 %sel = select i1 %volchk, i32 0, i32 %addrld
302 %sum.next = add i32 %sel, %sum
303 %indvar.next = add i32 %indvar, 1
304 %cond = icmp slt i32 %indvar.next, %n
305 br i1 %cond, label %loop, label %loopexit
311 ; We should not hoist the addrld out of the loop.
312 define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
313 ; CHECK-LABEL: @test_fence4
315 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
316 ; CHECK: br label %loop
318 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
319 %gep = bitcast i32* %addr.i to i8 *
323 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
324 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
325 store atomic i32 5, i32 * %addr.i unordered, align 8
327 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
328 %volload = load atomic i8, i8* %volatile unordered, align 8
330 %volchk = icmp eq i8 %volload, 0
331 %addrld = load atomic i32, i32* %addr.i unordered, align 8
332 %sel = select i1 %volchk, i32 0, i32 %addrld
333 %sum.next = add i32 %sel, %sum
334 %indvar.next = add i32 %indvar, 1
335 %cond = icmp slt i32 %indvar.next, %n
336 br i1 %cond, label %loop, label %loopexit