1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop-mssa(licm)' -S | FileCheck %s
4 @X = global i32 0 ; <ptr> [#uses=1]
8 declare i32 @llvm.bitreverse.i32(i32)
10 ; This testcase tests for a problem where LICM hoists
11 ; potentially trapping instructions when they are not guaranteed to execute.
12 define i32 @test1(i1 %c) {
13 ; CHECK-LABEL: @test1(
14 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4
15 ; CHECK-NEXT: br label [[LOOP:%.*]]
17 ; CHECK-NEXT: call void @foo()
18 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOPTAIL:%.*]], label [[IFUNEQUAL:%.*]]
20 ; CHECK-NEXT: [[B1:%.*]] = sdiv i32 4, [[A]]
21 ; CHECK-NEXT: br label [[LOOPTAIL]]
23 ; CHECK-NEXT: [[B:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[B1]], [[IFUNEQUAL]] ]
24 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[OUT:%.*]]
26 ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOPTAIL]] ]
27 ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]]
28 ; CHECK-NEXT: ret i32 [[C]]
30 %A = load i32, ptr @X ; <i32> [#uses=2]
32 Loop: ; preds = %LoopTail, %0
34 br i1 %c, label %LoopTail, label %IfUnEqual
36 IfUnEqual: ; preds = %Loop
37 %B1 = sdiv i32 4, %A ; <i32> [#uses=1]
40 LoopTail: ; preds = %IfUnEqual, %Loop
41 %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1]
42 br i1 %c, label %Loop, label %Out
43 Out: ; preds = %LoopTail
44 %C = sub i32 %A, %B ; <i32> [#uses=1]
49 declare void @foo2(i32) nounwind
52 ;; It is ok and desirable to hoist this potentially trapping instruction.
53 define i32 @test2(i1 %c) {
54 ; CHECK-LABEL: @test2(
55 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4
56 ; CHECK-NEXT: [[B:%.*]] = sdiv i32 4, [[A]]
57 ; CHECK-NEXT: br label [[LOOP:%.*]]
59 ; CHECK-NEXT: br label [[LOOP2:%.*]]
61 ; CHECK-NEXT: call void @foo2(i32 [[B]])
62 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]]
64 ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP2]] ]
65 ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]]
66 ; CHECK-NEXT: ret i32 [[C]]
72 ;; Should have hoisted this div!
77 call void @foo2( i32 %B )
78 br i1 %c, label %Loop, label %Out
86 ; Don't bother constant folding the add, just hoist it.
87 define i32 @test3(i1 %c) {
88 ; CHECK-LABEL: @test3(
89 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4
90 ; CHECK-NEXT: [[B:%.*]] = add i32 4, 2
91 ; CHECK-NEXT: br label [[LOOP:%.*]]
93 ; CHECK-NEXT: call void @foo2(i32 [[B]])
94 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]]
96 ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP]] ]
97 ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]]
98 ; CHECK-NEXT: ret i32 [[C]]
100 %A = load i32, ptr @X ; <i32> [#uses=2]
103 %B = add i32 4, 2 ; <i32> [#uses=2]
104 call void @foo2( i32 %B )
105 br i1 %c, label %Loop, label %Out
107 %C = sub i32 %A, %B ; <i32> [#uses=1]
111 define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
112 ; CHECK-LABEL: @test4(
114 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
116 ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
117 ; CHECK-NEXT: [[N_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
118 ; CHECK-NEXT: call void @foo_may_call_exit(i32 0)
119 ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]]
120 ; CHECK-NEXT: [[ADD]] = add nsw i32 [[N_01]], [[DIV]]
121 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1
122 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000
123 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
125 ; CHECK-NEXT: [[N_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
126 ; CHECK-NEXT: ret i32 [[N_0_LCSSA]]
131 for.body: ; preds = %entry, %for.body
132 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
133 %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
134 call void @foo_may_call_exit(i32 0)
135 %div = sdiv i32 %x, %y
136 %add = add nsw i32 %n.01, %div
137 %inc = add nsw i32 %i.02, 1
138 %cmp = icmp slt i32 %inc, 10000
139 br i1 %cmp, label %for.body, label %for.end
141 for.end: ; preds = %for.body
142 %n.0.lcssa = phi i32 [ %add, %for.body ]
146 declare void @foo_may_call_exit(i32)
149 define { ptr, i32 } @test5(i32 %i, { ptr, i32 } %e) {
150 ; CHECK-LABEL: @test5(
152 ; CHECK-NEXT: [[OUT:%.*]] = extractvalue { ptr, i32 } [[E:%.*]], 1
153 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
154 ; CHECK: tailrecurse:
155 ; CHECK-NEXT: [[I_TR:%.*]] = phi i32 [ [[I:%.*]], [[ENTRY:%.*]] ], [ [[CMP2:%.*]], [[THEN:%.*]] ]
156 ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[OUT]], [[I_TR]]
157 ; CHECK-NEXT: br i1 [[CMP1]], label [[THEN]], label [[IFEND:%.*]]
159 ; CHECK-NEXT: call void @foo()
160 ; CHECK-NEXT: [[CMP2]] = add i32 [[I_TR]], 1
161 ; CHECK-NEXT: br label [[TAILRECURSE]]
163 ; CHECK-NEXT: [[D_LE:%.*]] = insertvalue { ptr, i32 } [[E]], ptr null, 0
164 ; CHECK-NEXT: ret { ptr, i32 } [[D_LE]]
167 br label %tailrecurse
169 tailrecurse: ; preds = %then, %entry
170 %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
171 %out = extractvalue { ptr, i32 } %e, 1
172 %d = insertvalue { ptr, i32 } %e, ptr null, 0
173 %cmp1 = icmp sgt i32 %out, %i.tr
174 br i1 %cmp1, label %then, label %ifend
176 then: ; preds = %tailrecurse
178 %cmp2 = add i32 %i.tr, 1
179 br label %tailrecurse
181 ifend: ; preds = %tailrecurse
185 define void @test6(float %f) #2 {
186 ; CHECK-LABEL: @test6(
188 ; CHECK-NEXT: [[NEG:%.*]] = fneg float [[F:%.*]]
189 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
191 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
192 ; CHECK-NEXT: call void @foo_may_call_exit(i32 0)
193 ; CHECK-NEXT: call void @use(float [[NEG]])
194 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
195 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000
196 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
198 ; CHECK-NEXT: ret void
203 for.body: ; preds = %for.body, %entry
204 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
205 call void @foo_may_call_exit(i32 0)
207 call void @use(float %neg)
208 %inc = add nsw i32 %i, 1
209 %cmp = icmp slt i32 %inc, 10000
210 br i1 %cmp, label %for.body, label %for.end
212 for.end: ; preds = %for.body
216 declare void @use(float)
218 define i32 @hoist_bitreverse(i32 %0) {
219 ; CHECK-LABEL: @hoist_bitreverse(
220 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP0:%.*]])
221 ; CHECK-NEXT: br label [[HEADER:%.*]]
223 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[TMP1:%.*]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ]
224 ; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP6:%.*]], [[LATCH]] ]
225 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1024
226 ; CHECK-NEXT: br i1 [[TMP4]], label [[BODY:%.*]], label [[RETURN:%.*]]
228 ; CHECK-NEXT: [[TMP5]] = add i32 [[SUM]], [[TMP2]]
229 ; CHECK-NEXT: br label [[LATCH]]
231 ; CHECK-NEXT: [[TMP6]] = add nsw i32 [[TMP3]], 1
232 ; CHECK-NEXT: br label [[HEADER]]
234 ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[HEADER]] ]
235 ; CHECK-NEXT: ret i32 [[SUM_LCSSA]]
240 %sum = phi i32 [ 0, %1 ], [ %5, %latch ]
241 %2 = phi i32 [ 0, %1 ], [ %6, %latch ]
242 %3 = icmp slt i32 %2, 1024
243 br i1 %3, label %body, label %return
246 %4 = call i32 @llvm.bitreverse.i32(i32 %0)
247 %5 = add i32 %sum, %4
251 %6 = add nsw i32 %2, 1
258 ; Can neither sink nor hoist
259 define i32 @test_volatile(i1 %c) {
260 ; CHECK-LABEL: @test_volatile(
261 ; CHECK-NEXT: br label [[LOOP:%.*]]
263 ; CHECK-NEXT: [[A:%.*]] = load volatile i32, ptr @X, align 4
264 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]]
266 ; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A]], [[LOOP]] ]
267 ; CHECK-NEXT: ret i32 [[A_LCSSA]]
272 %A = load volatile i32, ptr @X
273 br i1 %c, label %Loop, label %Out
280 declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) nounwind readonly
281 declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind
282 declare void @escaping.invariant.start(ptr) nounwind
283 ; invariant.start dominates the load, and in this scope, the
284 ; load is invariant. So, we can hoist the `addrld` load out of the loop.
285 define i32 @test_fence(ptr %addr, i32 %n, ptr %volatile) {
286 ; CHECK-LABEL: @test_fence(
288 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
289 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8
290 ; CHECK-NEXT: fence release
291 ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]])
292 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
293 ; CHECK-NEXT: br label [[LOOP:%.*]]
295 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
296 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
297 ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
298 ; CHECK-NEXT: fence acquire
299 ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
300 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
301 ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
302 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
303 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
304 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
306 ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
307 ; CHECK-NEXT: ret i32 [[SUM_LCSSA]]
310 %gep = getelementptr inbounds i8, ptr %addr, i64 8
311 store atomic i32 5, ptr %gep unordered, align 8
313 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep)
317 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
318 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
319 %volload = load atomic i8, ptr %volatile unordered, align 8
321 %volchk = icmp eq i8 %volload, 0
322 %addrld = load atomic i32, ptr %gep unordered, align 8
323 %sel = select i1 %volchk, i32 0, i32 %addrld
324 %sum.next = add i32 %sel, %sum
325 %indvar.next = add i32 %indvar, 1
326 %cond = icmp slt i32 %indvar.next, %n
327 br i1 %cond, label %loop, label %loopexit
335 ; Same as test above, but the load is no longer invariant (presence of
336 ; invariant.end). We cannot hoist the addrld out of loop.
337 define i32 @test_fence1(ptr %addr, i32 %n, ptr %volatile) {
338 ; CHECK-LABEL: @test_fence1(
340 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
341 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8
342 ; CHECK-NEXT: fence release
343 ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]])
344 ; CHECK-NEXT: call void @llvm.invariant.end.p0(ptr [[INVST]], i64 4, ptr [[GEP]])
345 ; CHECK-NEXT: br label [[LOOP:%.*]]
347 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
348 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
349 ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
350 ; CHECK-NEXT: fence acquire
351 ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
352 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
353 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
354 ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
355 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
356 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
357 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
359 ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
360 ; CHECK-NEXT: ret i32 [[SUM_LCSSA]]
363 %gep = getelementptr inbounds i8, ptr %addr, i64 8
364 store atomic i32 5, ptr %gep unordered, align 8
366 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep)
367 call void @llvm.invariant.end.p0(ptr %invst, i64 4, ptr %gep)
371 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
372 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
373 %volload = load atomic i8, ptr %volatile unordered, align 8
375 %volchk = icmp eq i8 %volload, 0
376 %addrld = load atomic i32, ptr %gep unordered, align 8
377 %sel = select i1 %volchk, i32 0, i32 %addrld
378 %sum.next = add i32 %sel, %sum
379 %indvar.next = add i32 %indvar, 1
380 %cond = icmp slt i32 %indvar.next, %n
381 br i1 %cond, label %loop, label %loopexit
387 ; same as test above, but instead of invariant.end, we have the result of
388 ; invariant.start escaping through a call. We cannot hoist the load.
389 define i32 @test_fence2(ptr %addr, i32 %n, ptr %volatile) {
390 ; CHECK-LABEL: @test_fence2(
392 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
393 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8
394 ; CHECK-NEXT: fence release
395 ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]])
396 ; CHECK-NEXT: call void @escaping.invariant.start(ptr [[INVST]])
397 ; CHECK-NEXT: br label [[LOOP:%.*]]
399 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
400 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
401 ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
402 ; CHECK-NEXT: fence acquire
403 ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
404 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
405 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
406 ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
407 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
408 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
409 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
411 ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
412 ; CHECK-NEXT: ret i32 [[SUM_LCSSA]]
415 %gep = getelementptr inbounds i8, ptr %addr, i64 8
416 store atomic i32 5, ptr %gep unordered, align 8
418 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep)
419 call void @escaping.invariant.start(ptr %invst)
423 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
424 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
425 %volload = load atomic i8, ptr %volatile unordered, align 8
427 %volchk = icmp eq i8 %volload, 0
428 %addrld = load atomic i32, ptr %gep unordered, align 8
429 %sel = select i1 %volchk, i32 0, i32 %addrld
430 %sum.next = add i32 %sel, %sum
431 %indvar.next = add i32 %indvar, 1
432 %cond = icmp slt i32 %indvar.next, %n
433 br i1 %cond, label %loop, label %loopexit
439 ; Consider the loadoperand addr.i bitcasted before being passed to
441 define i32 @test_fence3(ptr %addr, i32 %n, ptr %volatile) {
442 ; CHECK-LABEL: @test_fence3(
444 ; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8
445 ; CHECK-NEXT: store atomic i32 5, ptr [[ADDR_I]] unordered, align 8
446 ; CHECK-NEXT: fence release
447 ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]])
448 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8
449 ; CHECK-NEXT: br label [[LOOP:%.*]]
451 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
452 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
453 ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
454 ; CHECK-NEXT: fence acquire
455 ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
456 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
457 ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
458 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
459 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
460 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
462 ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
463 ; CHECK-NEXT: ret i32 [[SUM_LCSSA]]
466 %addr.i = getelementptr inbounds i32, ptr %addr, i64 8
467 store atomic i32 5, ptr %addr.i unordered, align 8
469 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i)
473 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
474 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
475 %volload = load atomic i8, ptr %volatile unordered, align 8
477 %volchk = icmp eq i8 %volload, 0
478 %addrld = load atomic i32, ptr %addr.i unordered, align 8
479 %sel = select i1 %volchk, i32 0, i32 %addrld
480 %sum.next = add i32 %sel, %sum
481 %indvar.next = add i32 %indvar, 1
482 %cond = icmp slt i32 %indvar.next, %n
483 br i1 %cond, label %loop, label %loopexit
489 ; We should not hoist the addrld out of the loop.
490 define i32 @test_fence4(ptr %addr, i32 %n, ptr %volatile) {
491 ; CHECK-LABEL: @test_fence4(
493 ; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8
494 ; CHECK-NEXT: br label [[LOOP:%.*]]
496 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
497 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
498 ; CHECK-NEXT: store atomic i32 5, ptr [[ADDR_I]] unordered, align 8
499 ; CHECK-NEXT: fence release
500 ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]])
501 ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
502 ; CHECK-NEXT: fence acquire
503 ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
504 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8
505 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
506 ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
507 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
508 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
509 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
511 ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
512 ; CHECK-NEXT: ret i32 [[SUM_LCSSA]]
515 %addr.i = getelementptr inbounds i32, ptr %addr, i64 8
519 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
520 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
521 store atomic i32 5, ptr %addr.i unordered, align 8
523 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i)
524 %volload = load atomic i8, ptr %volatile unordered, align 8
526 %volchk = icmp eq i8 %volload, 0
527 %addrld = load atomic i32, ptr %addr.i unordered, align 8
528 %sel = select i1 %volchk, i32 0, i32 %addrld
529 %sum.next = add i32 %sel, %sum
530 %indvar.next = add i32 %indvar, 1
531 %cond = icmp slt i32 %indvar.next, %n
532 br i1 %cond, label %loop, label %loopexit
538 ; We can't hoist the invariant load out of the loop because
539 ; the marker is given a variable size (-1).
540 define i32 @test_fence5(ptr %addr, i32 %n, ptr %volatile) {
541 ; CHECK-LABEL: @test_fence5(
543 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
544 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8
545 ; CHECK-NEXT: fence release
546 ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 -1, ptr [[GEP]])
547 ; CHECK-NEXT: br label [[LOOP:%.*]]
549 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
550 ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
551 ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
552 ; CHECK-NEXT: fence acquire
553 ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
554 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
555 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
556 ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
557 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
558 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
559 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
561 ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
562 ; CHECK-NEXT: ret i32 [[SUM_LCSSA]]
565 %gep = getelementptr inbounds i8, ptr %addr, i64 8
566 store atomic i32 5, ptr %gep unordered, align 8
568 %invst = call ptr @llvm.invariant.start.p0(i64 -1, ptr %gep)
572 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
573 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
574 %volload = load atomic i8, ptr %volatile unordered, align 8
576 %volchk = icmp eq i8 %volload, 0
577 %addrld = load atomic i32, ptr %gep unordered, align 8
578 %sel = select i1 %volchk, i32 0, i32 %addrld
579 %sum.next = add i32 %sel, %sum
580 %indvar.next = add i32 %indvar, 1
581 %cond = icmp slt i32 %indvar.next, %n
582 br i1 %cond, label %loop, label %loopexit
590 @a = external global i8
592 ; FIXME: Support hoisting invariant loads of globals.
593 define void @test_fence6() {
594 ; CHECK-LABEL: @test_fence6(
596 ; CHECK-NEXT: [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr @a)
597 ; CHECK-NEXT: br label [[F:%.*]]
599 ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @a, align 1
600 ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], 0
601 ; CHECK-NEXT: [[T:%.*]] = icmp eq i8 [[TMP1]], 0
602 ; CHECK-NEXT: tail call void @g(i1 [[T]])
603 ; CHECK-NEXT: br label [[F]]
606 %i = call ptr @llvm.invariant.start.p0(i64 1, ptr @a)
612 %t = icmp eq i8 %1, 0
613 tail call void @g(i1 %t)