1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S < %s -basic-aa -gvn -dce | FileCheck %s
4 ; Analyze Load from clobbering Load.
6 define <vscale x 4 x i32> @load_store_clobber_load(<vscale x 4 x i32> *%p) {
7 ; CHECK-LABEL: @load_store_clobber_load(
8 ; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], align 16
9 ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* undef, align 16
10 ; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD1]]
11 ; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
13 %load1 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
14 store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* undef
15 %load2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p ; <- load to be eliminated
16 %add = add <vscale x 4 x i32> %load1, %load2
17 ret <vscale x 4 x i32> %add
20 define <vscale x 4 x i32> @load_store_clobber_load_mayalias(<vscale x 4 x i32>* %p, <vscale x 4 x i32>* %p2) {
21 ; CHECK-LABEL: @load_store_clobber_load_mayalias(
22 ; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], align 16
23 ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[P2:%.*]], align 16
24 ; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], align 16
25 ; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
26 ; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
28 %load1 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
29 store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %p2
30 %load2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
31 %sub = sub <vscale x 4 x i32> %load1, %load2
32 ret <vscale x 4 x i32> %sub
35 define <vscale x 4 x i32> @load_store_clobber_load_noalias(<vscale x 4 x i32>* noalias %p, <vscale x 4 x i32>* noalias %p2) {
36 ; CHECK-LABEL: @load_store_clobber_load_noalias(
37 ; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], align 16
38 ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[P2:%.*]], align 16
39 ; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD1]]
40 ; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
42 %load1 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
43 store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %p2
44 %load2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p ; <- load to be eliminated
45 %add = add <vscale x 4 x i32> %load1, %load2
46 ret <vscale x 4 x i32> %add
49 ; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias.
50 define i32 @load_clobber_load_gep1(<vscale x 4 x i32>* %p) {
51 ; CHECK-LABEL: @load_clobber_load_gep1(
52 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], i64 0, i64 1
53 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]], align 4
54 ; CHECK-NEXT: [[P2:%.*]] = bitcast <vscale x 4 x i32>* [[P]] to i32*
55 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[P2]], i64 1
56 ; CHECK-NEXT: [[LOAD2:%.*]] = load i32, i32* [[GEP2]], align 4
57 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]]
58 ; CHECK-NEXT: ret i32 [[ADD]]
60 %gep1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 0, i64 1
61 %load1 = load i32, i32* %gep1
62 %p2 = bitcast <vscale x 4 x i32>* %p to i32*
63 %gep2 = getelementptr i32, i32* %p2, i64 1
64 %load2 = load i32, i32* %gep2 ; <- load could be eliminated
65 %add = add i32 %load1, %load2
69 define i32 @load_clobber_load_gep2(<vscale x 4 x i32>* %p) {
70 ; CHECK-LABEL: @load_clobber_load_gep2(
71 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], i64 1, i64 0
72 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]], align 4
73 ; CHECK-NEXT: [[P2:%.*]] = bitcast <vscale x 4 x i32>* [[P]] to i32*
74 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[P2]], i64 4
75 ; CHECK-NEXT: [[LOAD2:%.*]] = load i32, i32* [[GEP2]], align 4
76 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]]
77 ; CHECK-NEXT: ret i32 [[ADD]]
79 %gep1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1, i64 0
80 %load1 = load i32, i32* %gep1
81 %p2 = bitcast <vscale x 4 x i32>* %p to i32*
82 %gep2 = getelementptr i32, i32* %p2, i64 4
83 %load2 = load i32, i32* %gep2 ; <- can not determine at compile-time if %load1 and %load2 are same addr
84 %add = add i32 %load1, %load2
88 ; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias.
89 define i32 @load_clobber_load_gep3(<vscale x 4 x i32>* %p) {
90 ; CHECK-LABEL: @load_clobber_load_gep3(
91 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], i64 1, i64 0
92 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]], align 4
93 ; CHECK-NEXT: [[P2:%.*]] = bitcast <vscale x 4 x i32>* [[P]] to <vscale x 4 x float>*
94 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[P2]], i64 1, i64 0
95 ; CHECK-NEXT: [[LOAD2:%.*]] = load float, float* [[GEP2]], align 4
96 ; CHECK-NEXT: [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
97 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
98 ; CHECK-NEXT: ret i32 [[ADD]]
100 %gep1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1, i64 0
101 %load1 = load i32, i32* %gep1
102 %p2 = bitcast <vscale x 4 x i32>* %p to <vscale x 4 x float>*
103 %gep2 = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %p2, i64 1, i64 0
104 %load2 = load float, float* %gep2 ; <- load could be eliminated
105 %cast = bitcast float %load2 to i32
106 %add = add i32 %load1, %cast
110 define <vscale x 4 x i32> @load_clobber_load_fence(<vscale x 4 x i32>* %p) {
111 ; CHECK-LABEL: @load_clobber_load_fence(
112 ; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], align 16
113 ; CHECK-NEXT: call void asm "", "~{memory}"()
114 ; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], align 16
115 ; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
116 ; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
118 %load1 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
119 call void asm "", "~{memory}"()
120 %load2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
121 %sub = sub <vscale x 4 x i32> %load1, %load2
122 ret <vscale x 4 x i32> %sub
125 define <vscale x 4 x i32> @load_clobber_load_sideeffect(<vscale x 4 x i32>* %p) {
126 ; CHECK-LABEL: @load_clobber_load_sideeffect(
127 ; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], align 16
128 ; CHECK-NEXT: call void asm sideeffect "", ""()
129 ; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], align 16
130 ; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
131 ; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
133 %load1 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
134 call void asm sideeffect "", ""()
135 %load2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
136 %add = add <vscale x 4 x i32> %load1, %load2
137 ret <vscale x 4 x i32> %add
140 ; Analyze Load from clobbering Store.
142 define <vscale x 4 x i32> @store_forward_to_load(<vscale x 4 x i32>* %p) {
143 ; CHECK-LABEL: @store_forward_to_load(
144 ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[P:%.*]], align 16
145 ; CHECK-NEXT: ret <vscale x 4 x i32> zeroinitializer
147 store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %p
148 %load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
149 ret <vscale x 4 x i32> %load
152 define <vscale x 4 x i32> @store_forward_to_load_sideeffect(<vscale x 4 x i32>* %p) {
153 ; CHECK-LABEL: @store_forward_to_load_sideeffect(
154 ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[P:%.*]], align 16
155 ; CHECK-NEXT: call void asm sideeffect "", ""()
156 ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], align 16
157 ; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
159 store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %p
160 call void asm sideeffect "", ""()
161 %load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
162 ret <vscale x 4 x i32> %load
165 define i32 @store_clobber_load() {
166 ; CHECK-LABEL: @store_clobber_load(
167 ; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x i32>
168 ; CHECK-NEXT: store <vscale x 4 x i32> undef, <vscale x 4 x i32>* [[ALLOC]], align 16
169 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[ALLOC]], i32 0, i32 1
170 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[PTR]], align 4
171 ; CHECK-NEXT: ret i32 [[LOAD]]
173 %alloc = alloca <vscale x 4 x i32>
174 store <vscale x 4 x i32> undef, <vscale x 4 x i32>* %alloc
175 %ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %alloc, i32 0, i32 1
176 %load = load i32, i32* %ptr
180 ; Analyze Load from clobbering MemInst.
182 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
184 define i32 @memset_clobber_load(<vscale x 4 x i32> *%p) {
185 ; CHECK-LABEL: @memset_clobber_load(
186 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <vscale x 4 x i32>* [[P:%.*]] to i8*
187 ; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false)
188 ; CHECK-NEXT: ret i32 16843009
190 %conv = bitcast <vscale x 4 x i32>* %p to i8*
191 tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)
192 %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 0, i64 5
193 %load = load i32, i32* %gep
197 define i32 @memset_clobber_load_vscaled_base(<vscale x 4 x i32> *%p) {
198 ; CHECK-LABEL: @memset_clobber_load_vscaled_base(
199 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <vscale x 4 x i32>* [[P:%.*]] to i8*
200 ; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false)
201 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], i64 1, i64 1
202 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]], align 4
203 ; CHECK-NEXT: ret i32 [[LOAD]]
205 %conv = bitcast <vscale x 4 x i32>* %p to i8*
206 tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)
207 %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1, i64 1
208 %load = load i32, i32* %gep
212 define i32 @memset_clobber_load_nonconst_index(<vscale x 4 x i32> *%p, i64 %idx1, i64 %idx2) {
213 ; CHECK-LABEL: @memset_clobber_load_nonconst_index(
214 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <vscale x 4 x i32>* [[P:%.*]] to i8*
215 ; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false)
216 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]
217 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]], align 4
218 ; CHECK-NEXT: ret i32 [[LOAD]]
220 %conv = bitcast <vscale x 4 x i32>* %p to i8*
221 tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)
222 %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 %idx1, i64 %idx2
223 %load = load i32, i32* %gep
228 ; Load elimination across BBs
230 define <vscale x 4 x i32>* @load_from_alloc_replaced_with_undef() {
231 ; CHECK-LABEL: @load_from_alloc_replaced_with_undef(
233 ; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>
234 ; CHECK-NEXT: br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]]
236 ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[A]], align 16
237 ; CHECK-NEXT: br label [[IF_END]]
239 ; CHECK-NEXT: ret <vscale x 4 x i32>* [[A]]
242 %a = alloca <vscale x 4 x i32>
243 %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %a, i64 0, i64 1
244 %load = load i32, i32* %gep ; <- load to be eliminated
245 %tobool = icmp eq i32 %load, 0 ; <- icmp to be eliminated
246 br i1 %tobool, label %if.end, label %if.then
249 store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %a
253 ret <vscale x 4 x i32>* %a
256 define i32 @redundant_load_elimination_1(<vscale x 4 x i32>* %p) {
257 ; CHECK-LABEL: @redundant_load_elimination_1(
259 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], i64 1, i64 1
260 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP]], align 4
261 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0
262 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
264 ; CHECK-NEXT: br label [[IF_END]]
266 ; CHECK-NEXT: ret i32 [[LOAD1]]
269 %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1, i64 1
270 %load1 = load i32, i32* %gep
271 %cmp = icmp eq i32 %load1, 0
272 br i1 %cmp, label %if.then, label %if.end
275 %load2 = load i32, i32* %gep ; <- load to be eliminated
276 %add = add i32 %load1, %load2
280 %result = phi i32 [ %add, %if.then ], [ %load1, %entry ]
284 ; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as NoAlias.
285 define void @redundant_load_elimination_2(i1 %c, <vscale x 4 x i32>* %p, i32* %q, <vscale x 4 x i32> %v) {
286 ; CHECK-LABEL: @redundant_load_elimination_2(
288 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], i64 1, i64 1
289 ; CHECK-NEXT: store i32 0, i32* [[GEP1]], align 4
290 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], i64 1, i64 0
291 ; CHECK-NEXT: store i32 1, i32* [[GEP2]], align 4
292 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
294 ; CHECK-NEXT: [[T:%.*]] = load i32, i32* [[GEP1]], align 4
295 ; CHECK-NEXT: store i32 [[T]], i32* [[Q:%.*]], align 4
296 ; CHECK-NEXT: ret void
298 ; CHECK-NEXT: ret void
301 %gep1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1, i64 1
302 store i32 0, i32* %gep1
303 %gep2 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1, i64 0
304 store i32 1, i32* %gep2
305 br i1 %c, label %if.else, label %if.then
308 %t = load i32, i32* %gep1 ; <- load could be eliminated
309 store i32 %t, i32* %q
316 ; TODO: load in if.then could have been eliminated
317 define void @missing_load_elimination(i1 %c, <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %q, <vscale x 4 x i32> %v) {
318 ; CHECK-LABEL: @missing_load_elimination(
320 ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[P:%.*]], align 16
321 ; CHECK-NEXT: [[P1:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], i64 1
322 ; CHECK-NEXT: store <vscale x 4 x i32> [[V:%.*]], <vscale x 4 x i32>* [[P1]], align 16
323 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
325 ; CHECK-NEXT: [[T:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[P]], align 16
326 ; CHECK-NEXT: store <vscale x 4 x i32> [[T]], <vscale x 4 x i32>* [[Q:%.*]], align 16
327 ; CHECK-NEXT: ret void
329 ; CHECK-NEXT: ret void
332 store <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %p
333 %p1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1
334 store <vscale x 4 x i32> %v, <vscale x 4 x i32>* %p1
335 br i1 %c, label %if.else, label %if.then
338 %t = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p ; load could be eliminated
339 store <vscale x 4 x i32> %t, <vscale x 4 x i32>* %q