1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s --check-prefixes=CHECK,LITTLE
3 ; RUN: opt -S -passes=instcombine -data-layout="E" < %s | FileCheck %s --check-prefixes=CHECK,BIG
5 define i8 @load_smaller_int(ptr %p) {
6 ; LITTLE-LABEL: @load_smaller_int(
7 ; LITTLE-NEXT: store i16 258, ptr [[P:%.*]], align 2
8 ; LITTLE-NEXT: ret i8 2
10 ; BIG-LABEL: @load_smaller_int(
11 ; BIG-NEXT: store i16 258, ptr [[P:%.*]], align 2
15 %load = load i8, ptr %p
19 ; This case can *not* be forwarded, as we only see part of the stored value.
20 define i32 @load_larger_int(ptr %p) {
21 ; CHECK-LABEL: @load_larger_int(
22 ; CHECK-NEXT: store i16 258, ptr [[P:%.*]], align 2
23 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 4
24 ; CHECK-NEXT: ret i32 [[LOAD]]
27 %load = load i32, ptr %p
31 define i32 @vec_store_load_first(ptr %p) {
32 ; CHECK-LABEL: @vec_store_load_first(
33 ; CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
34 ; CHECK-NEXT: ret i32 1
36 store <2 x i32> <i32 1, i32 2>, ptr %p
37 %load = load i32, ptr %p
41 define i17 @vec_store_load_first_odd_size(ptr %p) {
42 ; CHECK-LABEL: @vec_store_load_first_odd_size(
43 ; CHECK-NEXT: store <2 x i17> <i17 1, i17 2>, ptr [[P:%.*]], align 8
44 ; CHECK-NEXT: [[LOAD:%.*]] = load i17, ptr [[P]], align 4
45 ; CHECK-NEXT: ret i17 [[LOAD]]
47 store <2 x i17> <i17 1, i17 2>, ptr %p
48 %load = load i17, ptr %p
52 define i32 @vec_store_load_first_constexpr(ptr %p) {
53 ; CHECK-LABEL: @vec_store_load_first_constexpr(
54 ; CHECK-NEXT: store <2 x i32> bitcast (i64 ptrtoint (ptr @vec_store_load_first to i64) to <2 x i32>), ptr [[P:%.*]], align 8
55 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 4
56 ; CHECK-NEXT: ret i32 [[LOAD]]
58 store <2 x i32> bitcast (i64 ptrtoint (ptr @vec_store_load_first to i64) to <2 x i32>), ptr %p, align 8
59 %load = load i32, ptr %p, align 4
63 define i32 @vec_store_load_second(ptr %p) {
64 ; CHECK-LABEL: @vec_store_load_second(
65 ; CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
66 ; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
67 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P3]], align 4
68 ; CHECK-NEXT: ret i32 [[LOAD]]
70 store <2 x i32> <i32 1, i32 2>, ptr %p
71 %p3 = getelementptr i32, ptr %p, i64 1
72 %load = load i32, ptr %p3
76 define i64 @vec_store_load_whole(ptr %p) {
77 ; LITTLE-LABEL: @vec_store_load_whole(
78 ; LITTLE-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
79 ; LITTLE-NEXT: ret i64 8589934593
81 ; BIG-LABEL: @vec_store_load_whole(
82 ; BIG-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
83 ; BIG-NEXT: ret i64 4294967298
85 store <2 x i32> <i32 1, i32 2>, ptr %p
86 %load = load i64, ptr %p
90 define i32 @vec_store_load_overlap(ptr %p) {
91 ; CHECK-LABEL: @vec_store_load_overlap(
92 ; CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
93 ; CHECK-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i64 2
94 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P4]], align 2
95 ; CHECK-NEXT: ret i32 [[LOAD]]
97 store <2 x i32> <i32 1, i32 2>, ptr %p
98 %p4 = getelementptr i8, ptr %p, i64 2
99 %load = load i32, ptr %p4, align 2
103 define i32 @load_i32_store_nxv4i32(ptr %a) {
104 ; CHECK-LABEL: @load_i32_store_nxv4i32(
106 ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
107 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
108 ; CHECK-NEXT: ret i32 [[TMP0]]
111 store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
112 %0 = load i32, ptr %a, align 4
116 define i64 @load_i64_store_nxv8i8(ptr %a) {
117 ; CHECK-LABEL: @load_i64_store_nxv8i8(
119 ; CHECK-NEXT: store <vscale x 8 x i8> splat (i8 1), ptr [[A:%.*]], align 16
120 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[A]], align 8
121 ; CHECK-NEXT: ret i64 [[LOAD]]
124 store <vscale x 8 x i8> splat (i8 1), ptr %a, align 16
125 %load = load i64, ptr %a, align 8
129 define i64 @load_i64_store_nxv4i32(ptr %a) {
130 ; CHECK-LABEL: @load_i64_store_nxv4i32(
132 ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
133 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[A]], align 8
134 ; CHECK-NEXT: ret i64 [[LOAD]]
137 store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
138 %load = load i64, ptr %a, align 8
142 define i8 @load_i8_store_nxv4i32(ptr %a) {
143 ; CHECK-LABEL: @load_i8_store_nxv4i32(
145 ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
146 ; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[A]], align 1
147 ; CHECK-NEXT: ret i8 [[LOAD]]
150 store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
151 %load = load i8, ptr %a, align 1
155 define float @load_f32_store_nxv4f32(ptr %a) {
156 ; CHECK-LABEL: @load_f32_store_nxv4f32(
158 ; CHECK-NEXT: store <vscale x 4 x float> splat (float 1.000000e+00), ptr [[A:%.*]], align 16
159 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4
160 ; CHECK-NEXT: ret float [[TMP0]]
163 store <vscale x 4 x float> splat (float 1.0), ptr %a, align 16
164 %0 = load float, ptr %a, align 4
168 define i32 @load_i32_store_nxv4f32(ptr %a) {
169 ; CHECK-LABEL: @load_i32_store_nxv4f32(
171 ; CHECK-NEXT: store <vscale x 4 x float> splat (float 1.000000e+00), ptr [[A:%.*]], align 16
172 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A]], align 4
173 ; CHECK-NEXT: ret i32 [[LOAD]]
176 store <vscale x 4 x float> splat (float 1.0), ptr %a, align 16
177 %load = load i32, ptr %a, align 4
181 define <4 x i32> @load_v4i32_store_nxv4i32(ptr %a) {
182 ; CHECK-LABEL: @load_v4i32_store_nxv4i32(
184 ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
185 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16
186 ; CHECK-NEXT: ret <4 x i32> [[TMP0]]
189 store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
190 %0 = load <4 x i32>, ptr %a, align 16
194 define <4 x i16> @load_v4i16_store_nxv4i32(ptr %a) {
195 ; CHECK-LABEL: @load_v4i16_store_nxv4i32(
197 ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
198 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 16
199 ; CHECK-NEXT: ret <4 x i16> [[TMP0]]
202 store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
203 %0 = load <4 x i16>, ptr %a, align 16
207 ; Loaded data type exceeds the known minimum size of the store.
208 define i64 @load_i64_store_nxv4i8(ptr %a) {
209 ; CHECK-LABEL: @load_i64_store_nxv4i8(
211 ; CHECK-NEXT: store <vscale x 4 x i8> splat (i8 1), ptr [[A:%.*]], align 16
212 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[A]], align 8
213 ; CHECK-NEXT: ret i64 [[LOAD]]
216 store <vscale x 4 x i8> splat (i8 1), ptr %a, align 16
217 %load = load i64, ptr %a, align 8
221 ; Loaded data size is unknown - we cannot guarantee it won't
222 ; exceed the store size.
223 define <vscale x 4 x i8> @load_nxv4i8_store_nxv4i32(ptr %a) {
224 ; CHECK-LABEL: @load_nxv4i8_store_nxv4i32(
226 ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
227 ; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i8>, ptr [[A]], align 16
228 ; CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
231 store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
232 %0 = load <vscale x 4 x i8>, ptr %a, align 16
233 ret <vscale x 4 x i8> %0
236 define i8 @load_i8_store_i1(ptr %a) {
237 ; CHECK-LABEL: @load_i8_store_i1(
238 ; CHECK-NEXT: store i1 true, ptr [[A:%.*]], align 1
239 ; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[A]], align 1
240 ; CHECK-NEXT: ret i8 [[V]]
242 store i1 true, ptr %a
247 define i1 @load_i1_store_i8(ptr %a) {
248 ; CHECK-LABEL: @load_i1_store_i8(
249 ; CHECK-NEXT: store i8 1, ptr [[A:%.*]], align 1
250 ; CHECK-NEXT: ret i1 true
257 define i32 @load_after_memset_0(ptr %a) {
258 ; CHECK-LABEL: @load_after_memset_0(
259 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
260 ; CHECK-NEXT: ret i32 0
262 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
263 %v = load i32, ptr %a
267 define float @load_after_memset_0_float(ptr %a) {
268 ; CHECK-LABEL: @load_after_memset_0_float(
269 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
270 ; CHECK-NEXT: ret float 0.000000e+00
272 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
273 %v = load float, ptr %a
277 define i27 @load_after_memset_0_non_byte_sized(ptr %a) {
278 ; CHECK-LABEL: @load_after_memset_0_non_byte_sized(
279 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
280 ; CHECK-NEXT: ret i27 0
282 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
283 %v = load i27, ptr %a
287 define i1 @load_after_memset_0_i1(ptr %a) {
288 ; CHECK-LABEL: @load_after_memset_0_i1(
289 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
290 ; CHECK-NEXT: ret i1 false
292 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
297 define <4 x i8> @load_after_memset_0_vec(ptr %a) {
298 ; CHECK-LABEL: @load_after_memset_0_vec(
299 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
300 ; CHECK-NEXT: ret <4 x i8> zeroinitializer
302 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
303 %v = load <4 x i8>, ptr %a
307 define i32 @load_after_memset_1(ptr %a) {
308 ; CHECK-LABEL: @load_after_memset_1(
309 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
310 ; CHECK-NEXT: ret i32 16843009
312 call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
313 %v = load i32, ptr %a
317 define float @load_after_memset_1_float(ptr %a) {
318 ; CHECK-LABEL: @load_after_memset_1_float(
319 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
320 ; CHECK-NEXT: ret float 0x3820202020000000
322 call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
323 %v = load float, ptr %a
327 define i27 @load_after_memset_1_non_byte_sized(ptr %a) {
328 ; CHECK-LABEL: @load_after_memset_1_non_byte_sized(
329 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
330 ; CHECK-NEXT: ret i27 16843009
332 call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
333 %v = load i27, ptr %a
337 define i1 @load_after_memset_1_i1(ptr %a) {
338 ; CHECK-LABEL: @load_after_memset_1_i1(
339 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
340 ; CHECK-NEXT: ret i1 true
342 call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
347 define <4 x i8> @load_after_memset_1_vec(ptr %a) {
348 ; CHECK-LABEL: @load_after_memset_1_vec(
349 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
350 ; CHECK-NEXT: ret <4 x i8> splat (i8 1)
352 call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
353 %v = load <4 x i8>, ptr %a
357 define i32 @load_after_memset_unknown(ptr %a, i8 %byte) {
358 ; CHECK-LABEL: @load_after_memset_unknown(
359 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 [[BYTE:%.*]], i64 16, i1 false)
360 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
361 ; CHECK-NEXT: ret i32 [[V]]
363 call void @llvm.memset.p0.i64(ptr %a, i8 %byte, i64 16, i1 false)
364 %v = load i32, ptr %a
368 ; TODO: Handle load at offset.
369 define i32 @load_after_memset_0_offset(ptr %a) {
370 ; CHECK-LABEL: @load_after_memset_0_offset(
371 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
372 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
373 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
374 ; CHECK-NEXT: ret i32 [[V]]
376 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
377 %gep = getelementptr i8, ptr %a, i64 4
378 %v = load i32, ptr %gep
382 define i32 @load_after_memset_0_offset_too_large(ptr %a) {
383 ; CHECK-LABEL: @load_after_memset_0_offset_too_large(
384 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
385 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 13
386 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
387 ; CHECK-NEXT: ret i32 [[V]]
389 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
390 %gep = getelementptr i8, ptr %a, i64 13
391 %v = load i32, ptr %gep
395 define i32 @load_after_memset_0_offset_negative(ptr %a) {
396 ; CHECK-LABEL: @load_after_memset_0_offset_negative(
397 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
398 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 -1
399 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
400 ; CHECK-NEXT: ret i32 [[V]]
402 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
403 %gep = getelementptr i8, ptr %a, i64 -1
404 %v = load i32, ptr %gep
408 define i32 @load_after_memset_0_clobber(ptr %a) {
409 ; CHECK-LABEL: @load_after_memset_0_clobber(
410 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
411 ; CHECK-NEXT: store i8 1, ptr [[A]], align 1
412 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
413 ; CHECK-NEXT: ret i32 [[V]]
415 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
417 %v = load i32, ptr %a
421 define i256 @load_after_memset_0_too_small(ptr %a) {
422 ; CHECK-LABEL: @load_after_memset_0_too_small(
423 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
424 ; CHECK-NEXT: [[V:%.*]] = load i256, ptr [[A]], align 4
425 ; CHECK-NEXT: ret i256 [[V]]
427 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
428 %v = load i256, ptr %a
432 define i129 @load_after_memset_0_too_small_by_one_bit(ptr %a) {
433 ; CHECK-LABEL: @load_after_memset_0_too_small_by_one_bit(
434 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
435 ; CHECK-NEXT: [[V:%.*]] = load i129, ptr [[A]], align 4
436 ; CHECK-NEXT: ret i129 [[V]]
438 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
439 %v = load i129, ptr %a
443 define i32 @load_after_memset_0_unknown_length(ptr %a, i64 %len) {
444 ; CHECK-LABEL: @load_after_memset_0_unknown_length(
445 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[A:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
446 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
447 ; CHECK-NEXT: ret i32 [[V]]
449 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 %len, i1 false)
450 %v = load i32, ptr %a
454 define i32 @load_after_memset_0_atomic(ptr %a) {
455 ; CHECK-LABEL: @load_after_memset_0_atomic(
456 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
457 ; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[A]] seq_cst, align 4
458 ; CHECK-NEXT: ret i32 [[V]]
460 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
461 %v = load atomic i32, ptr %a seq_cst, align 4
465 define <vscale x 1 x i32> @load_after_memset_0_scalable(ptr %a) {
466 ; CHECK-LABEL: @load_after_memset_0_scalable(
467 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
468 ; CHECK-NEXT: [[V:%.*]] = load <vscale x 1 x i32>, ptr [[A]], align 4
469 ; CHECK-NEXT: ret <vscale x 1 x i32> [[V]]
471 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
472 %v = load <vscale x 1 x i32>, ptr %a
473 ret <vscale x 1 x i32> %v
476 declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)