1 ; RUN: opt < %s -S -early-cse | FileCheck %s
2 ; RUN: opt < %s -S -passes=early-cse | FileCheck %s
4 declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
5 declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
7 ; Check that we do load-load forwarding over invariant.start, since it does not
9 define i8 @test_bypass1(i8 *%P) {
10 ; CHECK-LABEL: @test_bypass1(
11 ; CHECK-NEXT: %V1 = load i8, i8* %P
12 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
13 ; CHECK-NEXT: ret i8 0
16 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
18 %Diff = sub i8 %V1, %V2
23 ; Trivial Store->load forwarding over invariant.start
24 define i8 @test_bypass2(i8 *%P) {
25 ; CHECK-LABEL: @test_bypass2(
26 ; CHECK-NEXT: store i8 42, i8* %P
27 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
28 ; CHECK-NEXT: ret i8 42
31 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
36 ; We can DSE over invariant.start calls, since the first store to
37 ; %P is valid, and the second store is actually unreachable based on semantics
39 define void @test_bypass3(i8* %P) {
40 ; CHECK-LABEL: @test_bypass3(
41 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
42 ; CHECK-NEXT: store i8 60, i8* %P
45 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
51 ; FIXME: Now the first store can actually be eliminated, since there is no read within
52 ; the invariant region, between start and end.
53 define void @test_bypass4(i8* %P) {
55 ; CHECK-LABEL: @test_bypass4(
56 ; CHECK-NEXT: store i8 50, i8* %P
57 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
58 ; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P)
59 ; CHECK-NEXT: store i8 60, i8* %P
63 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
64 call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P)
70 declare void @clobber()
71 declare {}* @llvm.invariant.start.p0i32(i64 %size, i32* nocapture %ptr)
72 declare void @llvm.invariant.end.p0i32({}*, i64, i32* nocapture) nounwind
74 define i32 @test_before_load(i32* %p) {
75 ; CHECK-LABEL: @test_before_load
77 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
78 %v1 = load i32, i32* %p
80 %v2 = load i32, i32* %p
81 %sub = sub i32 %v1, %v2
85 define i32 @test_before_clobber(i32* %p) {
86 ; CHECK-LABEL: @test_before_clobber
88 %v1 = load i32, i32* %p
89 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
91 %v2 = load i32, i32* %p
92 %sub = sub i32 %v1, %v2
96 define i32 @test_duplicate_scope(i32* %p) {
97 ; CHECK-LABEL: @test_duplicate_scope
99 %v1 = load i32, i32* %p
100 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
102 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
103 %v2 = load i32, i32* %p
104 %sub = sub i32 %v1, %v2
108 define i32 @test_unanalzyable_load(i32* %p) {
109 ; CHECK-LABEL: @test_unanalzyable_load
111 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
113 %v1 = load i32, i32* %p
115 %v2 = load i32, i32* %p
116 %sub = sub i32 %v1, %v2
120 define i32 @test_negative_after_clobber(i32* %p) {
121 ; CHECK-LABEL: @test_negative_after_clobber
122 ; CHECK: ret i32 %sub
123 %v1 = load i32, i32* %p
125 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
126 %v2 = load i32, i32* %p
127 %sub = sub i32 %v1, %v2
131 define i32 @test_merge(i32* %p, i1 %cnd) {
132 ; CHECK-LABEL: @test_merge
134 %v1 = load i32, i32* %p
135 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
136 br i1 %cnd, label %merge, label %taken
142 %v2 = load i32, i32* %p
143 %sub = sub i32 %v1, %v2
147 define i32 @test_negative_after_mergeclobber(i32* %p, i1 %cnd) {
148 ; CHECK-LABEL: @test_negative_after_mergeclobber
149 ; CHECK: ret i32 %sub
150 %v1 = load i32, i32* %p
151 br i1 %cnd, label %merge, label %taken
157 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
158 %v2 = load i32, i32* %p
159 %sub = sub i32 %v1, %v2
163 ; In theory, this version could work, but earlycse is incapable of
164 ; merging facts along distinct paths.
165 define i32 @test_false_negative_merge(i32* %p, i1 %cnd) {
166 ; CHECK-LABEL: @test_false_negative_merge
167 ; CHECK: ret i32 %sub
168 %v1 = load i32, i32* %p
169 br i1 %cnd, label %merge, label %taken
172 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
176 %v2 = load i32, i32* %p
177 %sub = sub i32 %v1, %v2
181 define i32 @test_merge_unanalyzable_load(i32* %p, i1 %cnd) {
182 ; CHECK-LABEL: @test_merge_unanalyzable_load
184 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
186 %v1 = load i32, i32* %p
187 br i1 %cnd, label %merge, label %taken
193 %v2 = load i32, i32* %p
194 %sub = sub i32 %v1, %v2
198 define void @test_dse_before_load(i32* %p, i1 %cnd) {
199 ; CHECK-LABEL: @test_dse_before_load
201 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
202 %v1 = load i32, i32* %p
204 store i32 %v1, i32* %p
208 define void @test_dse_after_load(i32* %p, i1 %cnd) {
209 ; CHECK-LABEL: @test_dse_after_load
211 %v1 = load i32, i32* %p
212 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
214 store i32 %v1, i32* %p
219 ; In this case, we have a false negative since MemoryLocation is implicitly
220 ; typed due to the user of a Value to represent the address. Note that other
221 ; passes will canonicalize away the bitcasts in this example.
222 define i32 @test_false_negative_types(i32* %p) {
223 ; CHECK-LABEL: @test_false_negative_types
224 ; CHECK: ret i32 %sub
225 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
226 %v1 = load i32, i32* %p
228 %pf = bitcast i32* %p to float*
229 %v2f = load float, float* %pf
230 %v2 = bitcast float %v2f to i32
231 %sub = sub i32 %v1, %v2
235 define i32 @test_negative_size1(i32* %p) {
236 ; CHECK-LABEL: @test_negative_size1
237 ; CHECK: ret i32 %sub
238 call {}* @llvm.invariant.start.p0i32(i64 3, i32* %p)
239 %v1 = load i32, i32* %p
241 %v2 = load i32, i32* %p
242 %sub = sub i32 %v1, %v2
246 define i32 @test_negative_size2(i32* %p) {
247 ; CHECK-LABEL: @test_negative_size2
248 ; CHECK: ret i32 %sub
249 call {}* @llvm.invariant.start.p0i32(i64 0, i32* %p)
250 %v1 = load i32, i32* %p
252 %v2 = load i32, i32* %p
253 %sub = sub i32 %v1, %v2
257 define i32 @test_negative_scope(i32* %p) {
258 ; CHECK-LABEL: @test_negative_scope
259 ; CHECK: ret i32 %sub
260 %scope = call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
261 call void @llvm.invariant.end.p0i32({}* %scope, i64 4, i32* %p)
262 %v1 = load i32, i32* %p
264 %v2 = load i32, i32* %p
265 %sub = sub i32 %v1, %v2
269 define i32 @test_false_negative_scope(i32* %p) {
270 ; CHECK-LABEL: @test_false_negative_scope
271 ; CHECK: ret i32 %sub
272 %scope = call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p)
273 %v1 = load i32, i32* %p
275 %v2 = load i32, i32* %p
276 call void @llvm.invariant.end.p0i32({}* %scope, i64 4, i32* %p)
277 %sub = sub i32 %v1, %v2
281 ; Invariant load defact starts an invariant.start scope of the appropriate size
282 define i32 @test_invariant_load_scope(i32* %p) {
283 ; CHECK-LABEL: @test_invariant_load_scope
285 %v1 = load i32, i32* %p, !invariant.load !{}
287 %v2 = load i32, i32* %p
288 %sub = sub i32 %v1, %v2