1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
4 @gp = global ptr null, align 8
6 declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
8 define i1 @compare_global_trivialeq() {
9 ; CHECK-LABEL: @compare_global_trivialeq(
10 ; CHECK-NEXT: ret i1 false
12 %m = call ptr @malloc(i64 4)
13 %lgp = load ptr, ptr @gp, align 8
14 %cmp = icmp eq ptr %m, %lgp
18 define i1 @compare_global_trivialne() {
19 ; CHECK-LABEL: @compare_global_trivialne(
20 ; CHECK-NEXT: ret i1 true
22 %m = call ptr @malloc(i64 4)
23 %lgp = load ptr, ptr @gp, align 8
24 %cmp = icmp ne ptr %m, %lgp
29 ; Although the %m is marked nocapture in the deopt operand in call to function f,
30 ; we cannot remove the alloc site: call to malloc
31 ; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
33 define i1 @compare_and_call_with_deopt() {
34 ; CHECK-LABEL: @compare_and_call_with_deopt(
35 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
36 ; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
37 ; CHECK-NEXT: ret i1 false
39 %m = call ptr @malloc(i64 24)
40 %lgp = load ptr, ptr @gp, align 8, !nonnull !0
41 %cmp = icmp eq ptr %lgp, %m
42 tail call void @f() [ "deopt"(ptr %m) ]
46 ; Same functon as above with deopt operand in function f, but comparison is NE
47 define i1 @compare_ne_and_call_with_deopt() {
48 ; CHECK-LABEL: @compare_ne_and_call_with_deopt(
49 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
50 ; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
51 ; CHECK-NEXT: ret i1 true
53 %m = call ptr @malloc(i64 24)
54 %lgp = load ptr, ptr @gp, align 8, !nonnull !0
55 %cmp = icmp ne ptr %lgp, %m
56 tail call void @f() [ "deopt"(ptr %m) ]
60 ; Same function as above, but global not marked nonnull, and we cannot fold the comparison
61 define i1 @compare_ne_global_maybe_null() {
62 ; CHECK-LABEL: @compare_ne_global_maybe_null(
63 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
64 ; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8
65 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[LGP]], [[M]]
66 ; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
67 ; CHECK-NEXT: ret i1 [[CMP]]
69 %m = call ptr @malloc(i64 24)
70 %lgp = load ptr, ptr @gp
71 %cmp = icmp ne ptr %lgp, %m
72 tail call void @f() [ "deopt"(ptr %m) ]
76 ; FIXME: The comparison should fold to false since %m escapes (call to function escape)
77 ; after the comparison.
78 declare void @escape(ptr)
79 define i1 @compare_and_call_after() {
80 ; CHECK-LABEL: @compare_and_call_after(
81 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
82 ; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0:![0-9]+]]
83 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], [[LGP]]
84 ; CHECK-NEXT: br i1 [[CMP]], label [[ESCAPE_CALL:%.*]], label [[JUST_RETURN:%.*]]
86 ; CHECK-NEXT: call void @escape(ptr [[M]])
87 ; CHECK-NEXT: ret i1 true
89 ; CHECK-NEXT: ret i1 false
91 %m = call ptr @malloc(i64 24)
92 %lgp = load ptr, ptr @gp, align 8, !nonnull !0
93 %cmp = icmp eq ptr %m, %lgp
94 br i1 %cmp, label %escape_call, label %just_return
97 call void @escape(ptr %m)
104 define i1 @compare_distinct_mallocs() {
105 ; CHECK-LABEL: @compare_distinct_mallocs(
106 ; CHECK-NEXT: ret i1 false
108 %m = call ptr @malloc(i64 4)
109 %n = call ptr @malloc(i64 4)
110 %cmp = icmp eq ptr %m, %n
114 ; the compare is folded to true since the folding compare looks through bitcasts.
115 ; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
116 define i1 @compare_samepointer_under_bitcast() {
117 ; CHECK-LABEL: @compare_samepointer_under_bitcast(
118 ; CHECK-NEXT: ret i1 true
120 %m = call ptr @malloc(i64 4)
121 %cmp = icmp eq ptr %m, %m
125 ; the compare is folded to true since the folding compare looks through bitcasts.
126 ; The malloc call for %m cannot be elided since it is used in the call to function f.
127 define i1 @compare_samepointer_escaped() {
128 ; CHECK-LABEL: @compare_samepointer_escaped(
129 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
130 ; CHECK-NEXT: call void @f() [ "deopt"(ptr [[M]]) ]
131 ; CHECK-NEXT: ret i1 true
133 %m = call ptr @malloc(i64 4)
134 %cmp = icmp eq ptr %m, %m
135 call void @f() [ "deopt"(ptr %m) ]
139 ; Technically, we can fold the %cmp2 comparison, even though %m escapes through
140 ; the ret statement since `ret` terminates the function and we cannot reach from
142 ; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
143 ; cross-threading data dependencies since we do not make the distinction between
144 ; atomic and non-atomic loads in capture tracking.
145 define ptr @compare_ret_escape(ptr %c) {
146 ; CHECK-LABEL: @compare_ret_escape(
147 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
148 ; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
149 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[N]], [[C:%.*]]
150 ; CHECK-NEXT: br i1 [[CMP]], label [[RETST:%.*]], label [[CHK:%.*]]
152 ; CHECK-NEXT: ret ptr [[M]]
154 ; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0]]
155 ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[LGP]]
156 ; CHECK-NEXT: br i1 [[CMP2]], label [[RETST]], label [[CHK2:%.*]]
158 ; CHECK-NEXT: ret ptr [[N]]
160 %m = call ptr @malloc(i64 4)
161 %n = call ptr @malloc(i64 4)
162 %cmp = icmp eq ptr %n, %c
163 br i1 %cmp, label %retst, label %chk
169 %lgp = load ptr, ptr @gp, align 8, !nonnull !0
170 %cmp2 = icmp eq ptr %m, %lgp
171 br i1 %cmp2, label %retst, label %chk2
177 ; The malloc call for %m cannot be elided since it is used in the call to function f.
178 ; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
179 define i1 @compare_distinct_pointer_escape() {
180 ; CHECK-LABEL: @compare_distinct_pointer_escape(
181 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
182 ; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
183 ; CHECK-NEXT: ret i1 true
185 %m = call ptr @malloc(i64 4)
186 %n = call ptr @malloc(i64 4)
187 tail call void @f() [ "deopt"(ptr %m) ]
188 %cmp = icmp ne ptr %m, %n
192 ; The next block of tests demonstrate a very subtle correctness requirement.
193 ; We can generally assume any *single* heap layout we chose for the result of
194 ; a malloc call, but we can't simultanious assume two different ones. As a
195 ; result, we must make sure that we only fold conditions if we can ensure that
196 ; we fold *all* potentially address capturing compares the same. This is
197 ; the same point that applies to allocas, applied to noaiias/malloc.
199 ; These two functions represents either a) forging a pointer via inttoptr or
200 ; b) indexing off an adjacent allocation. In either case, the operation is
201 ; obscured by an uninlined helper and not visible to instcombine.
202 declare ptr @hidden_inttoptr()
203 declare ptr @hidden_offset(ptr %other)
205 ; FIXME: Missed oppurtunity
206 define i1 @ptrtoint_single_cmp() {
207 ; CHECK-LABEL: @ptrtoint_single_cmp(
208 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
209 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
210 ; CHECK-NEXT: ret i1 [[CMP]]
212 %m = call ptr @malloc(i64 4)
213 %rhs = inttoptr i64 2048 to ptr
214 %cmp = icmp eq ptr %m, %rhs
218 define i1 @offset_single_cmp() {
219 ; CHECK-LABEL: @offset_single_cmp(
220 ; CHECK-NEXT: ret i1 false
222 %m = call ptr @malloc(i64 4)
223 %n = call ptr @malloc(i64 4)
224 %rhs = getelementptr i8, ptr %n, i32 4
225 %cmp = icmp eq ptr %m, %rhs
229 declare void @witness(i1, i1)
231 define void @neg_consistent_fold1() {
232 ; CHECK-LABEL: @neg_consistent_fold1(
233 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
234 ; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
235 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
236 ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
237 ; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
238 ; CHECK-NEXT: ret void
240 %m = call ptr @malloc(i64 4)
241 %rhs = inttoptr i64 2048 to ptr
242 %rhs2 = call ptr @hidden_inttoptr()
243 %cmp1 = icmp eq ptr %m, %rhs
244 %cmp2 = icmp eq ptr %m, %rhs2
245 call void @witness(i1 %cmp1, i1 %cmp2)
249 define void @neg_consistent_fold2() {
250 ; CHECK-LABEL: @neg_consistent_fold2(
251 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
252 ; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
253 ; CHECK-NEXT: [[RHS:%.*]] = getelementptr i8, ptr [[N]], i64 4
254 ; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_offset(ptr [[N]])
255 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[RHS]]
256 ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
257 ; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
258 ; CHECK-NEXT: ret void
260 %m = call ptr @malloc(i64 4)
261 %n = call ptr @malloc(i64 4)
262 %rhs = getelementptr i8, ptr %n, i32 4
263 %rhs2 = call ptr @hidden_offset(ptr %n)
264 %cmp1 = icmp eq ptr %m, %rhs
265 %cmp2 = icmp eq ptr %m, %rhs2
266 call void @witness(i1 %cmp1, i1 %cmp2)
270 define void @neg_consistent_fold3() {
271 ; CHECK-LABEL: @neg_consistent_fold3(
272 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
273 ; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8
274 ; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
275 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[LGP]]
276 ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
277 ; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
278 ; CHECK-NEXT: ret void
280 %m = call ptr @malloc(i64 4)
281 %lgp = load ptr, ptr @gp, align 8
282 %rhs2 = call ptr @hidden_inttoptr()
283 %cmp1 = icmp eq ptr %m, %lgp
284 %cmp2 = icmp eq ptr %m, %rhs2
285 call void @witness(i1 %cmp1, i1 %cmp2)
289 ; FIXME: This appears correct, but the current implementation relies
290 ; on visiting both cmps in the same pass. We may have an simplification order
291 ; under which one is missed, and that would be a bug.
292 define void @neg_consistent_fold4() {
293 ; CHECK-LABEL: @neg_consistent_fold4(
294 ; CHECK-NEXT: call void @witness(i1 false, i1 false)
295 ; CHECK-NEXT: ret void
297 %m = call ptr @malloc(i64 4)
298 %lgp = load ptr, ptr @gp, align 8
299 %cmp1 = icmp eq ptr %m, %lgp
300 %cmp2 = icmp eq ptr %m, %lgp
301 call void @witness(i1 %cmp1, i1 %cmp2)
305 declare void @unknown(ptr)
307 ; Points out that a nocapture call can't cause a consistent result issue
308 ; as it is (by assumption) not able to contain a comparison which might
309 ; capture the address.
311 define i1 @consistent_nocapture_inttoptr() {
312 ; CHECK-LABEL: @consistent_nocapture_inttoptr(
313 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
314 ; CHECK-NEXT: call void @unknown(ptr nocapture [[M]])
315 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
316 ; CHECK-NEXT: ret i1 [[CMP]]
318 %m = call ptr @malloc(i64 4)
319 call void @unknown(ptr nocapture %m)
320 %rhs = inttoptr i64 2048 to ptr
321 %cmp = icmp eq ptr %m, %rhs
325 define i1 @consistent_nocapture_offset() {
326 ; CHECK-LABEL: @consistent_nocapture_offset(
327 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
328 ; CHECK-NEXT: call void @unknown(ptr nocapture [[M]])
329 ; CHECK-NEXT: ret i1 false
331 %m = call ptr @malloc(i64 4)
332 call void @unknown(ptr nocapture %m)
333 %n = call ptr @malloc(i64 4)
334 %rhs = getelementptr i8, ptr %n, i32 4
335 %cmp = icmp eq ptr %m, %rhs
339 define i1 @consistent_nocapture_through_global() {
340 ; CHECK-LABEL: @consistent_nocapture_through_global(
341 ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
342 ; CHECK-NEXT: call void @unknown(ptr nocapture [[M]])
343 ; CHECK-NEXT: ret i1 false
345 %m = call ptr @malloc(i64 4)
346 call void @unknown(ptr nocapture %m)
347 %lgp = load ptr, ptr @gp, align 8, !nonnull !0
348 %cmp = icmp eq ptr %m, %lgp
352 ; End consistent heap layout tests
354 ; We can fold this by assuming a single heap layout
355 define i1 @two_nonnull_mallocs() {
356 ; CHECK-LABEL: @two_nonnull_mallocs(
357 ; CHECK-NEXT: ret i1 false
359 %m = call nonnull ptr @malloc(i64 4)
360 %n = call nonnull ptr @malloc(i64 4)
361 %cmp = icmp eq ptr %m, %n
365 ; The address of %n is captured, but %m can be arranged to make
366 ; the comparison non-equal.
367 define i1 @two_nonnull_mallocs2() {
368 ; CHECK-LABEL: @two_nonnull_mallocs2(
369 ; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
370 ; CHECK-NEXT: call void @unknown(ptr nonnull [[N]])
371 ; CHECK-NEXT: ret i1 false
373 %m = call nonnull ptr @malloc(i64 4)
374 %n = call nonnull ptr @malloc(i64 4)
375 call void @unknown(ptr %n)
376 %cmp = icmp eq ptr %m, %n
380 ; TODO: We can fold this, but don't with the current scheme.
381 define i1 @two_nonnull_mallocs_hidden() {
382 ; CHECK-LABEL: @two_nonnull_mallocs_hidden(
383 ; CHECK-NEXT: [[M:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
384 ; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
385 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 1
386 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds nuw i8, ptr [[N]], i64 2
387 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]]
388 ; CHECK-NEXT: ret i1 [[CMP]]
390 %m = call nonnull ptr @malloc(i64 4)
391 %n = call nonnull ptr @malloc(i64 4)
392 %gep1 = getelementptr i8, ptr %m, i32 1
393 %gep2 = getelementptr i8, ptr %n, i32 2
394 %cmp = icmp eq ptr %gep1, %gep2