1 ; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3 target triple = "x86_64-apple-darwin10.0.0"
5 define void @test1(i8* %Base, i64 %Size) nounwind ssp {
6 bb.nph: ; preds = %entry
9 for.body: ; preds = %bb.nph, %for.body
10 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
11 %I.0.014 = getelementptr i8* %Base, i64 %indvar
12 store i8 0, i8* %I.0.014, align 1
13 %indvar.next = add i64 %indvar, 1
14 %exitcond = icmp eq i64 %indvar.next, %Size
15 br i1 %exitcond, label %for.end, label %for.body
17 for.end: ; preds = %for.body, %entry
20 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
24 ; This is a loop that was rotated but where the blocks weren't merged. This
25 ; shouldn't perturb us.
26 define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
27 bb.nph: ; preds = %entry
30 for.body: ; preds = %bb.nph, %for.body
31 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
32 %I.0.014 = getelementptr i8* %Base, i64 %indvar
33 store i8 0, i8* %I.0.014, align 1
34 %indvar.next = add i64 %indvar, 1
35 br label %for.body.cont
37 %exitcond = icmp eq i64 %indvar.next, %Size
38 br i1 %exitcond, label %for.end, label %for.body
40 for.end: ; preds = %for.body, %entry
43 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
48 define void @test2(i32* %Base, i64 %Size) nounwind ssp {
50 %cmp10 = icmp eq i64 %Size, 0
51 br i1 %cmp10, label %for.end, label %for.body
53 for.body: ; preds = %entry, %for.body
54 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
55 %add.ptr.i = getelementptr i32* %Base, i64 %i.011
56 store i32 16843009, i32* %add.ptr.i, align 4
57 %inc = add nsw i64 %i.011, 1
58 %exitcond = icmp eq i64 %inc, %Size
59 br i1 %exitcond, label %for.end, label %for.body
61 for.end: ; preds = %for.body, %entry
64 ; CHECK: br i1 %cmp10,
65 ; CHECK: %tmp = mul i64 %Size, 4
66 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %tmp, i32 4, i1 false)
70 ; This is a case where there is an extra may-aliased store in the loop, we can't
72 define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
76 for.body: ; preds = %entry, %for.body
77 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
78 %add.ptr.i = getelementptr i32* %Base, i64 %i.011
79 store i32 16843009, i32* %add.ptr.i, align 4
81 store i8 42, i8* %MayAlias
82 %inc = add nsw i64 %i.011, 1
83 %exitcond = icmp eq i64 %inc, %Size
84 br i1 %exitcond, label %for.end, label %for.body
86 for.end: ; preds = %entry
94 ;; TODO: We should be able to promote this memset. Not yet though.
95 define void @test4(i8* %Base) nounwind ssp {
96 bb.nph: ; preds = %entry
97 %Base100 = getelementptr i8* %Base, i64 1000
100 for.body: ; preds = %bb.nph, %for.body
101 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
102 %I.0.014 = getelementptr i8* %Base, i64 %indvar
103 store i8 0, i8* %I.0.014, align 1
105 ;; Store beyond the range memset, should be safe to promote.
106 store i8 42, i8* %Base100
108 %indvar.next = add i64 %indvar, 1
109 %exitcond = icmp eq i64 %indvar.next, 100
110 br i1 %exitcond, label %for.end, label %for.body
112 for.end: ; preds = %for.body, %entry
115 ; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
116 ; CHECK-TODO-NOT: store
119 ; This can't be promoted: the memset is a store of a loop variant value.
120 define void @test5(i8* %Base, i64 %Size) nounwind ssp {
121 bb.nph: ; preds = %entry
124 for.body: ; preds = %bb.nph, %for.body
125 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
126 %I.0.014 = getelementptr i8* %Base, i64 %indvar
128 %V = trunc i64 %indvar to i8
129 store i8 %V, i8* %I.0.014, align 1
130 %indvar.next = add i64 %indvar, 1
131 %exitcond = icmp eq i64 %indvar.next, %Size
132 br i1 %exitcond, label %for.end, label %for.body
134 for.end: ; preds = %for.body, %entry
143 define void @test6(i64 %Size) nounwind ssp {
145 %Base = alloca i8, i32 10000
146 %Dest = alloca i8, i32 10000
149 for.body: ; preds = %bb.nph, %for.body
150 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
151 %I.0.014 = getelementptr i8* %Base, i64 %indvar
152 %DestI = getelementptr i8* %Dest, i64 %indvar
153 %V = load i8* %I.0.014, align 1
154 store i8 %V, i8* %DestI, align 1
155 %indvar.next = add i64 %indvar, 1
156 %exitcond = icmp eq i64 %indvar.next, %Size
157 br i1 %exitcond, label %for.end, label %for.body
159 for.end: ; preds = %for.body, %entry
162 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
168 ; This is a loop that was rotated but where the blocks weren't merged. This
169 ; shouldn't perturb us.
170 define void @test7(i8* %Base, i64 %Size) nounwind ssp {
171 bb.nph: ; preds = %entry
174 for.body: ; preds = %bb.nph, %for.body
175 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
176 br label %for.body.cont
178 %I.0.014 = getelementptr i8* %Base, i64 %indvar
179 store i8 0, i8* %I.0.014, align 1
180 %indvar.next = add i64 %indvar, 1
181 %exitcond = icmp eq i64 %indvar.next, %Size
182 br i1 %exitcond, label %for.end, label %for.body
184 for.end: ; preds = %for.body, %entry
187 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
191 ; This is a loop should not be transformed, it only executes one iteration.
192 define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
193 bb.nph: ; preds = %entry
196 for.body: ; preds = %bb.nph, %for.body
197 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
198 %PI = getelementptr i64* %Ptr, i64 %indvar
199 store i64 0, i64 *%PI
200 %indvar.next = add i64 %indvar, 1
201 %exitcond = icmp eq i64 %indvar.next, 1
202 br i1 %exitcond, label %for.end, label %for.body
204 for.end: ; preds = %for.body, %entry
207 ; CHECK: store i64 0, i64* %PI
210 declare i8* @external(i8*)
212 ;; This cannot be transformed into a memcpy, because the read-from location is
213 ;; mutated by the loop.
214 define void @test9(i64 %Size) nounwind ssp {
216 %Base = alloca i8, i32 10000
217 %Dest = alloca i8, i32 10000
219 %BaseAlias = call i8* @external(i8* %Base)
222 for.body: ; preds = %bb.nph, %for.body
223 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
224 %I.0.014 = getelementptr i8* %Base, i64 %indvar
225 %DestI = getelementptr i8* %Dest, i64 %indvar
226 %V = load i8* %I.0.014, align 1
227 store i8 %V, i8* %DestI, align 1
229 ;; This store can clobber the input.
230 store i8 4, i8* %BaseAlias
232 %indvar.next = add i64 %indvar, 1
233 %exitcond = icmp eq i64 %indvar.next, %Size
234 br i1 %exitcond, label %for.end, label %for.body
236 for.end: ; preds = %for.body, %entry
239 ; CHECK-NOT: llvm.memcpy
243 ; Two dimensional nested loop should be promoted to one big memset.
244 define void @test10(i8* %X) nounwind ssp {
248 bb.nph: ; preds = %entry, %for.inc10
249 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
252 for.body5: ; preds = %for.body5, %bb.nph
253 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
254 %mul = mul nsw i32 %i.04, 100
255 %add = add nsw i32 %j.02, %mul
256 %idxprom = sext i32 %add to i64
257 %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
258 store i8 0, i8* %arrayidx, align 1
259 %inc = add nsw i32 %j.02, 1
260 %cmp4 = icmp eq i32 %inc, 100
261 br i1 %cmp4, label %for.inc10, label %for.body5
263 for.inc10: ; preds = %for.body5
264 %inc12 = add nsw i32 %i.04, 1
265 %cmp = icmp eq i32 %inc12, 100
266 br i1 %cmp, label %for.end13, label %bb.nph
268 for.end13: ; preds = %for.inc10
272 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
277 ; On darwin10 (which is the triple in this .ll file) this loop can be turned
278 ; into a memset_pattern call.
280 define void @test11_pattern(i32* nocapture %P) nounwind ssp {
284 for.body: ; preds = %entry, %for.body
285 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
286 %arrayidx = getelementptr i32* %P, i64 %indvar
287 store i32 1, i32* %arrayidx, align 4
288 %indvar.next = add i64 %indvar, 1
289 %exitcond = icmp eq i64 %indvar.next, 10000
290 br i1 %exitcond, label %for.end, label %for.body
292 for.end: ; preds = %for.body
294 ; CHECK: @test11_pattern
296 ; CHECK-NEXT: bitcast
297 ; CHECK-NEXT: memset_pattern
302 ; Store of null should turn into memset of zero.
303 define void @test12(i32** nocapture %P) nounwind ssp {
307 for.body: ; preds = %entry, %for.body
308 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
309 %arrayidx = getelementptr i32** %P, i64 %indvar
310 store i32* null, i32** %arrayidx, align 4
311 %indvar.next = add i64 %indvar, 1
312 %exitcond = icmp eq i64 %indvar.next, 10000
313 br i1 %exitcond, label %for.end, label %for.body
315 for.end: ; preds = %for.body
319 ; CHECK-NEXT: bitcast
320 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
327 ; This store-of-address loop can be turned into a memset_pattern call.
329 define void @test13_pattern(i32** nocapture %P) nounwind ssp {
333 for.body: ; preds = %entry, %for.body
334 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
335 %arrayidx = getelementptr i32** %P, i64 %indvar
336 store i32* @G, i32** %arrayidx, align 4
337 %indvar.next = add i64 %indvar, 1
338 %exitcond = icmp eq i64 %indvar.next, 10000
339 br i1 %exitcond, label %for.end, label %for.body
341 for.end: ; preds = %for.body
343 ; CHECK: @test13_pattern
345 ; CHECK-NEXT: bitcast
346 ; CHECK-NEXT: memset_pattern
353 ; PR9815 - This is a partial overlap case that cannot be safely transformed
355 @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
357 define i32 @test14() nounwind {
361 for.body: ; preds = %for.inc, %for.body.lr.ph
362 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
363 %add = add nsw i32 %tmp5, 4
364 %idxprom = sext i32 %add to i64
365 %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom
366 %tmp2 = load i32* %arrayidx, align 4
367 %add4 = add nsw i32 %tmp5, 5
368 %idxprom5 = sext i32 %add4 to i64
369 %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5
370 store i32 %tmp2, i32* %arrayidx6, align 4
371 %inc = add nsw i32 %tmp5, 1
372 %cmp = icmp slt i32 %inc, 2
373 br i1 %cmp, label %for.body, label %for.end
375 for.end: ; preds = %for.inc
376 %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4